xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 95dbaa6faf01fdfd99114b7c9e5668e4b2aa754d)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscBool      lf;
1105   PetscMPIInt    size;
1106 
1107   PetscFunctionBegin;
1108   /* Easy test: symmetric diagonal block */
1109   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1110   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1111   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1112   if (!*f) PetscFunctionReturn(0);
1113   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1115   if (size == 1) PetscFunctionReturn(0);
1116 
1117   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1118   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1119   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1120   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1121   for (i=0; i<first; i++) notme[i] = i;
1122   for (i=last; i<M; i++) notme[i-last+first] = i;
1123   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1124   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1125   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1126   Aoff = Aoffs[0];
1127   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1128   Boff = Boffs[0];
1129   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1130   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1131   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1132   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1133   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1134   ierr = PetscFree(notme);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* send it on its way */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   /* do local part */
1158   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   /* receive remote parts */
1160   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165   This only works correctly for square matrices where the subblock A->A is the
1166    diagonal block
1167 */
1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1169 {
1170   PetscErrorCode ierr;
1171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1172 
1173   PetscFunctionBegin;
1174   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1175   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1176   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1181 {
1182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1187   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1192 {
1193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197 #if defined(PETSC_USE_LOG)
1198   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1199 #endif
1200   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1201   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1202   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1203   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1204 #if defined(PETSC_USE_CTABLE)
1205   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1206 #else
1207   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1208 #endif
1209   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1210   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1212   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1213   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1214   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1215   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_ELEMENTAL)
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1228 #endif
1229 #if defined(PETSC_HAVE_HYPRE)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1239 {
1240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1241   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1242   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1243   PetscErrorCode ierr;
1244   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1245   int            fd;
1246   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1247   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1248   PetscScalar    *column_values;
1249   PetscInt       message_count,flowcontrolcount;
1250   FILE           *file;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1255   nz   = A->nz + B->nz;
1256   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257   if (!rank) {
1258     header[0] = MAT_FILE_CLASSID;
1259     header[1] = mat->rmap->N;
1260     header[2] = mat->cmap->N;
1261 
1262     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     /* get largest number of rows any processor has */
1265     rlen  = mat->rmap->n;
1266     range = mat->rmap->range;
1267     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1268   } else {
1269     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     rlen = mat->rmap->n;
1271   }
1272 
1273   /* load up the local row counts */
1274   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1275   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1276 
1277   /* store the row lengths to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1283       rlen = range[i+1] - range[i];
1284       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1290     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1291     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1292   }
1293   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1294 
1295   /* load up the local column indices */
1296   nzmax = nz; /* th processor needs space a largest processor needs */
1297   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1299   cnt   = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ((col = garray[B->j[j]]) > cstart) break;
1303       column_indices[cnt++] = col;
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1306     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1307   }
1308   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1309 
1310   /* store the column indices to the file */
1311   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1312   if (!rank) {
1313     MPI_Status status;
1314     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     for (i=1; i<size; i++) {
1316       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1317       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1318       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1319       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1321     }
1322     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1323   } else {
1324     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1325     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1326     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1328   }
1329   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1330 
1331   /* load up the local column values */
1332   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1333   cnt  = 0;
1334   for (i=0; i<mat->rmap->n; i++) {
1335     for (j=B->i[i]; j<B->i[i+1]; j++) {
1336       if (garray[B->j[j]] > cstart) break;
1337       column_values[cnt++] = B->a[j];
1338     }
1339     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1340     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1341   }
1342   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1343 
1344   /* store the column values to the file */
1345   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1346   if (!rank) {
1347     MPI_Status status;
1348     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1349     for (i=1; i<size; i++) {
1350       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1351       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1352       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1353       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1360     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(column_values);CHKERRQ(ierr);
1364 
1365   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1366   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #include <petscdraw.h>
1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1372 {
1373   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1374   PetscErrorCode    ierr;
1375   PetscMPIInt       rank = aij->rank,size = aij->size;
1376   PetscBool         isdraw,iascii,isbinary;
1377   PetscViewer       sviewer;
1378   PetscViewerFormat format;
1379 
1380   PetscFunctionBegin;
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1382   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1384   if (iascii) {
1385     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1386     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1387       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1388       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1389       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       for (i=0; i<(PetscInt)size; i++) {
1391         nmax = PetscMax(nmax,nz[i]);
1392         nmin = PetscMin(nmin,nz[i]);
1393         navg += nz[i];
1394       }
1395       ierr = PetscFree(nz);CHKERRQ(ierr);
1396       navg = navg/size;
1397       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1398       PetscFunctionReturn(0);
1399     }
1400     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1401     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1402       MatInfo   info;
1403       PetscBool inodes;
1404 
1405       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1406       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1407       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1409       if (!inodes) {
1410         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1411                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1412       } else {
1413         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1414                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1415       }
1416       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1420       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1421       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1422       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1423       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1424       PetscFunctionReturn(0);
1425     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1426       PetscInt inodecount,inodelimit,*inodes;
1427       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1428       if (inodes) {
1429         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1430       } else {
1431         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1432       }
1433       PetscFunctionReturn(0);
1434     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1435       PetscFunctionReturn(0);
1436     }
1437   } else if (isbinary) {
1438     if (size == 1) {
1439       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1440       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1441     } else {
1442       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1443     }
1444     PetscFunctionReturn(0);
1445   } else if (isdraw) {
1446     PetscDraw draw;
1447     PetscBool isnull;
1448     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1449     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1450     if (isnull) PetscFunctionReturn(0);
1451   }
1452 
1453   {
1454     /* assemble the entire matrix onto first processor. */
1455     Mat        A;
1456     Mat_SeqAIJ *Aloc;
1457     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1458     MatScalar  *a;
1459 
1460     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1461     if (!rank) {
1462       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1463     } else {
1464       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1465     }
1466     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1467     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1468     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1469     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1470     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1471 
1472     /* copy over the A part */
1473     Aloc = (Mat_SeqAIJ*)aij->A->data;
1474     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475     row  = mat->rmap->rstart;
1476     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1477     for (i=0; i<m; i++) {
1478       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1479       row++;
1480       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1481     }
1482     aj = Aloc->j;
1483     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1484 
1485     /* copy over the B part */
1486     Aloc = (Mat_SeqAIJ*)aij->B->data;
1487     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1488     row  = mat->rmap->rstart;
1489     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1490     ct   = cols;
1491     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1492     for (i=0; i<m; i++) {
1493       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1494       row++;
1495       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1496     }
1497     ierr = PetscFree(ct);CHKERRQ(ierr);
1498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1500     /*
1501        Everyone has to call to draw the matrix since the graphics waits are
1502        synchronized across all processors that share the PetscDraw object
1503     */
1504     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     if (!rank) {
1506       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1508     }
1509     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1510     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1517 {
1518   PetscErrorCode ierr;
1519   PetscBool      iascii,isdraw,issocket,isbinary;
1520 
1521   PetscFunctionBegin;
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1526   if (iascii || isdraw || isbinary || issocket) {
1527     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1533 {
1534   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1535   PetscErrorCode ierr;
1536   Vec            bb1 = 0;
1537   PetscBool      hasop;
1538 
1539   PetscFunctionBegin;
1540   if (flag == SOR_APPLY_UPPER) {
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542     PetscFunctionReturn(0);
1543   }
1544 
1545   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1547   }
1548 
1549   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554 
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587     while (its--) {
1588       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590 
1591       /* update rhs: bb1 = bb - B*x */
1592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1594 
1595       /* local sweep */
1596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1597     }
1598   } else if (flag & SOR_EISENSTAT) {
1599     Vec xx1;
1600 
1601     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1603 
1604     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     if (!mat->diag) {
1607       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1608       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1609     }
1610     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1611     if (hasop) {
1612       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1613     } else {
1614       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1615     }
1616     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1617 
1618     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1619 
1620     /* local sweep */
1621     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1622     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1623     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1624   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1625 
1626   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1627 
1628   matin->factorerrortype = mat->A->factorerrortype;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1633 {
1634   Mat            aA,aB,Aperm;
1635   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1636   PetscScalar    *aa,*ba;
1637   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1638   PetscSF        rowsf,sf;
1639   IS             parcolp = NULL;
1640   PetscBool      done;
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1645   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1648 
1649   /* Invert row permutation to find out where my rows should go */
1650   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1651   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1652   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1654   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656 
1657   /* Invert column permutation to find out where my columns should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1661   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1662   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1665 
1666   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1667   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1668   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1669 
1670   /* Find out where my gcols should go */
1671   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1672   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1674   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1675   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1676   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1679 
1680   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1681   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     PetscInt row = rdest[i],rowner;
1685     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1686     for (j=ai[i]; j<ai[i+1]; j++) {
1687       PetscInt cowner,col = cdest[aj[j]];
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692     for (j=bi[i]; j<bi[i+1]; j++) {
1693       PetscInt cowner,col = gcdest[bj[j]];
1694       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1695       if (rowner == cowner) dnnz[i]++;
1696       else onnz[i]++;
1697     }
1698   }
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1704 
1705   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1706   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1708   for (i=0; i<m; i++) {
1709     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1710     PetscInt j0,rowlen;
1711     rowlen = ai[i+1] - ai[i];
1712     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716     rowlen = bi[i+1] - bi[i];
1717     for (j0=j=0; j<rowlen; j0=j) {
1718       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1719       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1720     }
1721   }
1722   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1726   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1728   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1729   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1730   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1731   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1732   *B = Aperm;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1737 {
1738   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1743   if (ghosts) *ghosts = aij->garray;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A    = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size = 1.0;
1756   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1759   isend[3] = info->memory;  isend[4] = info->mallocs;
1760 
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762 
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1773 
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   } else if (flag == MAT_GLOBAL_SUM) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   }
1788   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1789   info->fill_ratio_needed = 0;
1790   info->factor_mallocs    = 0;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     MatCheckPreallocated(A,1);
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     MatCheckPreallocated(A,1);
1814     a->roworiented = flg;
1815 
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_NEW_DIAGONALS:
1820     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1821     break;
1822   case MAT_IGNORE_OFF_PROC_ENTRIES:
1823     a->donotstash = flg;
1824     break;
1825   case MAT_SPD:
1826     A->spd_set = PETSC_TRUE;
1827     A->spd     = flg;
1828     if (flg) {
1829       A->symmetric                  = PETSC_TRUE;
1830       A->structurally_symmetric     = PETSC_TRUE;
1831       A->symmetric_set              = PETSC_TRUE;
1832       A->structurally_symmetric_set = PETSC_TRUE;
1833     }
1834     break;
1835   case MAT_SYMMETRIC:
1836     MatCheckPreallocated(A,1);
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_STRUCTURALLY_SYMMETRIC:
1840     MatCheckPreallocated(A,1);
1841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1842     break;
1843   case MAT_HERMITIAN:
1844     MatCheckPreallocated(A,1);
1845     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1846     break;
1847   case MAT_SYMMETRY_ETERNAL:
1848     MatCheckPreallocated(A,1);
1849     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1850     break;
1851   case MAT_SUBMAT_SINGLEIS:
1852     A->submat_singleis = flg;
1853     break;
1854   case MAT_STRUCTURE_ONLY:
1855     /* The option is handled directly by MatSetOption() */
1856     break;
1857   default:
1858     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1859   }
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1864 {
1865   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1866   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1867   PetscErrorCode ierr;
1868   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1869   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1870   PetscInt       *cmap,*idx_p;
1871 
1872   PetscFunctionBegin;
1873   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1874   mat->getrowactive = PETSC_TRUE;
1875 
1876   if (!mat->rowvalues && (idx || v)) {
1877     /*
1878         allocate enough space to hold information from the longest row.
1879     */
1880     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1881     PetscInt   max = 1,tmp;
1882     for (i=0; i<matin->rmap->n; i++) {
1883       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1884       if (max < tmp) max = tmp;
1885     }
1886     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1887   }
1888 
1889   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1890   lrow = row - rstart;
1891 
1892   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1893   if (!v)   {pvA = 0; pvB = 0;}
1894   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1895   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1896   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1897   nztot = nzA + nzB;
1898 
1899   cmap = mat->garray;
1900   if (v  || idx) {
1901     if (nztot) {
1902       /* Sort by increasing column numbers, assuming A and B already sorted */
1903       PetscInt imark = -1;
1904       if (v) {
1905         *v = v_p = mat->rowvalues;
1906         for (i=0; i<nzB; i++) {
1907           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1908           else break;
1909         }
1910         imark = i;
1911         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1912         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1913       }
1914       if (idx) {
1915         *idx = idx_p = mat->rowindices;
1916         if (imark > -1) {
1917           for (i=0; i<imark; i++) {
1918             idx_p[i] = cmap[cworkB[i]];
1919           }
1920         } else {
1921           for (i=0; i<nzB; i++) {
1922             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1923             else break;
1924           }
1925           imark = i;
1926         }
1927         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1928         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1929       }
1930     } else {
1931       if (idx) *idx = 0;
1932       if (v)   *v   = 0;
1933     }
1934   }
1935   *nz  = nztot;
1936   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1937   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1942 {
1943   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1944 
1945   PetscFunctionBegin;
1946   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1947   aij->getrowactive = PETSC_FALSE;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1952 {
1953   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1954   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1955   PetscErrorCode ierr;
1956   PetscInt       i,j,cstart = mat->cmap->rstart;
1957   PetscReal      sum = 0.0;
1958   MatScalar      *v;
1959 
1960   PetscFunctionBegin;
1961   if (aij->size == 1) {
1962     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1963   } else {
1964     if (type == NORM_FROBENIUS) {
1965       v = amat->a;
1966       for (i=0; i<amat->nz; i++) {
1967         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1968       }
1969       v = bmat->a;
1970       for (i=0; i<bmat->nz; i++) {
1971         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1972       }
1973       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1974       *norm = PetscSqrtReal(*norm);
1975       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1976     } else if (type == NORM_1) { /* max column norm */
1977       PetscReal *tmp,*tmp2;
1978       PetscInt  *jj,*garray = aij->garray;
1979       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1980       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1981       *norm = 0.0;
1982       v     = amat->a; jj = amat->j;
1983       for (j=0; j<amat->nz; j++) {
1984         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1985       }
1986       v = bmat->a; jj = bmat->j;
1987       for (j=0; j<bmat->nz; j++) {
1988         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1989       }
1990       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1991       for (j=0; j<mat->cmap->N; j++) {
1992         if (tmp2[j] > *norm) *norm = tmp2[j];
1993       }
1994       ierr = PetscFree(tmp);CHKERRQ(ierr);
1995       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1996       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1997     } else if (type == NORM_INFINITY) { /* max row norm */
1998       PetscReal ntemp = 0.0;
1999       for (j=0; j<aij->A->rmap->n; j++) {
2000         v   = amat->a + amat->i[j];
2001         sum = 0.0;
2002         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2003           sum += PetscAbsScalar(*v); v++;
2004         }
2005         v = bmat->a + bmat->i[j];
2006         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2007           sum += PetscAbsScalar(*v); v++;
2008         }
2009         if (sum > ntemp) ntemp = sum;
2010       }
2011       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2012       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2013     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2014   }
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2019 {
2020   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2021   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2022   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2023   PetscErrorCode ierr;
2024   Mat            B,A_diag,*B_diag;
2025   MatScalar      *array;
2026 
2027   PetscFunctionBegin;
2028   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2029   ai = Aloc->i; aj = Aloc->j;
2030   bi = Bloc->i; bj = Bloc->j;
2031   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2032     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2033     PetscSFNode          *oloc;
2034     PETSC_UNUSED PetscSF sf;
2035 
2036     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2037     /* compute d_nnz for preallocation */
2038     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2039     for (i=0; i<ai[ma]; i++) {
2040       d_nnz[aj[i]]++;
2041     }
2042     /* compute local off-diagonal contributions */
2043     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2044     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2045     /* map those to global */
2046     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2047     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2048     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2049     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2050     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2051     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2052     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2053 
2054     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2055     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2056     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2057     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2058     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2059     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2060   } else {
2061     B    = *matout;
2062     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2063   }
2064 
2065   b           = (Mat_MPIAIJ*)B->data;
2066   A_diag      = a->A;
2067   B_diag      = &b->A;
2068   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2069   A_diag_ncol = A_diag->cmap->N;
2070   B_diag_ilen = sub_B_diag->ilen;
2071   B_diag_i    = sub_B_diag->i;
2072 
2073   /* Set ilen for diagonal of B */
2074   for (i=0; i<A_diag_ncol; i++) {
2075     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2076   }
2077 
2078   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2079   very quickly (=without using MatSetValues), because all writes are local. */
2080   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2081 
2082   /* copy over the B part */
2083   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2084   array = Bloc->a;
2085   row   = A->rmap->rstart;
2086   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2087   cols_tmp = cols;
2088   for (i=0; i<mb; i++) {
2089     ncol = bi[i+1]-bi[i];
2090     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2091     row++;
2092     array += ncol; cols_tmp += ncol;
2093   }
2094   ierr = PetscFree(cols);CHKERRQ(ierr);
2095 
2096   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2098   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2099     *matout = B;
2100   } else {
2101     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2102   }
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2107 {
2108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2109   Mat            a    = aij->A,b = aij->B;
2110   PetscErrorCode ierr;
2111   PetscInt       s1,s2,s3;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2115   if (rr) {
2116     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2117     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2118     /* Overlap communication with computation. */
2119     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2120   }
2121   if (ll) {
2122     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2123     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2124     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2125   }
2126   /* scale  the diagonal block */
2127   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2128 
2129   if (rr) {
2130     /* Do a scatter end and then right scale the off-diagonal block */
2131     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2132     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2148 {
2149   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2150   Mat            a,b,c,d;
2151   PetscBool      flg;
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   a = matA->A; b = matA->B;
2156   c = matB->A; d = matB->B;
2157 
2158   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2159   if (flg) {
2160     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2161   }
2162   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2167 {
2168   PetscErrorCode ierr;
2169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2170   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2171 
2172   PetscFunctionBegin;
2173   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2174   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2175     /* because of the column compression in the off-processor part of the matrix a->B,
2176        the number of columns in a->B and b->B may be different, hence we cannot call
2177        the MatCopy() directly on the two parts. If need be, we can provide a more
2178        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2179        then copying the submatrices */
2180     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2181   } else {
2182     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2183     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2184   }
2185   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2190 {
2191   PetscErrorCode ierr;
2192 
2193   PetscFunctionBegin;
2194   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 /*
2199    Computes the number of nonzeros per row needed for preallocation when X and Y
2200    have different nonzero structure.
2201 */
2202 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2203 {
2204   PetscInt       i,j,k,nzx,nzy;
2205 
2206   PetscFunctionBegin;
2207   /* Set the number of nonzeros in the new matrix */
2208   for (i=0; i<m; i++) {
2209     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2210     nzx = xi[i+1] - xi[i];
2211     nzy = yi[i+1] - yi[i];
2212     nnz[i] = 0;
2213     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2214       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2215       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2216       nnz[i]++;
2217     }
2218     for (; k<nzy; k++) nnz[i]++;
2219   }
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2224 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2225 {
2226   PetscErrorCode ierr;
2227   PetscInt       m = Y->rmap->N;
2228   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2229   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2230 
2231   PetscFunctionBegin;
2232   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2237 {
2238   PetscErrorCode ierr;
2239   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2240   PetscBLASInt   bnz,one=1;
2241   Mat_SeqAIJ     *x,*y;
2242 
2243   PetscFunctionBegin;
2244   if (str == SAME_NONZERO_PATTERN) {
2245     PetscScalar alpha = a;
2246     x    = (Mat_SeqAIJ*)xx->A->data;
2247     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2248     y    = (Mat_SeqAIJ*)yy->A->data;
2249     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2250     x    = (Mat_SeqAIJ*)xx->B->data;
2251     y    = (Mat_SeqAIJ*)yy->B->data;
2252     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2253     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2254     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2255   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2256     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2257   } else {
2258     Mat      B;
2259     PetscInt *nnz_d,*nnz_o;
2260     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2261     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2262     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2263     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2264     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2265     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2266     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2267     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2268     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2269     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2270     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2271     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2272     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2273     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2274   }
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2279 
2280 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2281 {
2282 #if defined(PETSC_USE_COMPLEX)
2283   PetscErrorCode ierr;
2284   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2285 
2286   PetscFunctionBegin;
2287   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2288   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2289 #else
2290   PetscFunctionBegin;
2291 #endif
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2296 {
2297   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2302   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2307 {
2308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2309   PetscErrorCode ierr;
2310 
2311   PetscFunctionBegin;
2312   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2313   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2318 {
2319   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2320   PetscErrorCode ierr;
2321   PetscInt       i,*idxb = 0;
2322   PetscScalar    *va,*vb;
2323   Vec            vtmp;
2324 
2325   PetscFunctionBegin;
2326   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2327   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2328   if (idx) {
2329     for (i=0; i<A->rmap->n; i++) {
2330       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2331     }
2332   }
2333 
2334   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2335   if (idx) {
2336     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2337   }
2338   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2339   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2340 
2341   for (i=0; i<A->rmap->n; i++) {
2342     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2343       va[i] = vb[i];
2344       if (idx) idx[i] = a->garray[idxb[i]];
2345     }
2346   }
2347 
2348   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2349   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2350   ierr = PetscFree(idxb);CHKERRQ(ierr);
2351   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2356 {
2357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2358   PetscErrorCode ierr;
2359   PetscInt       i,*idxb = 0;
2360   PetscScalar    *va,*vb;
2361   Vec            vtmp;
2362 
2363   PetscFunctionBegin;
2364   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2365   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2366   if (idx) {
2367     for (i=0; i<A->cmap->n; i++) {
2368       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2369     }
2370   }
2371 
2372   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2373   if (idx) {
2374     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2375   }
2376   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2377   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2378 
2379   for (i=0; i<A->rmap->n; i++) {
2380     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2381       va[i] = vb[i];
2382       if (idx) idx[i] = a->garray[idxb[i]];
2383     }
2384   }
2385 
2386   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2387   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2388   ierr = PetscFree(idxb);CHKERRQ(ierr);
2389   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2396   PetscInt       n      = A->rmap->n;
2397   PetscInt       cstart = A->cmap->rstart;
2398   PetscInt       *cmap  = mat->garray;
2399   PetscInt       *diagIdx, *offdiagIdx;
2400   Vec            diagV, offdiagV;
2401   PetscScalar    *a, *diagA, *offdiagA;
2402   PetscInt       r;
2403   PetscErrorCode ierr;
2404 
2405   PetscFunctionBegin;
2406   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2407   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2408   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2409   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2410   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2411   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2412   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2413   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2414   for (r = 0; r < n; ++r) {
2415     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2416       a[r]   = diagA[r];
2417       idx[r] = cstart + diagIdx[r];
2418     } else {
2419       a[r]   = offdiagA[r];
2420       idx[r] = cmap[offdiagIdx[r]];
2421     }
2422   }
2423   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2424   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2426   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2427   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2428   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2435   PetscInt       n      = A->rmap->n;
2436   PetscInt       cstart = A->cmap->rstart;
2437   PetscInt       *cmap  = mat->garray;
2438   PetscInt       *diagIdx, *offdiagIdx;
2439   Vec            diagV, offdiagV;
2440   PetscScalar    *a, *diagA, *offdiagA;
2441   PetscInt       r;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2447   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2448   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2449   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2451   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2452   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2453   for (r = 0; r < n; ++r) {
2454     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2455       a[r]   = diagA[r];
2456       idx[r] = cstart + diagIdx[r];
2457     } else {
2458       a[r]   = offdiagA[r];
2459       idx[r] = cmap[offdiagIdx[r]];
2460     }
2461   }
2462   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2465   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2466   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2467   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2472 {
2473   PetscErrorCode ierr;
2474   Mat            *dummy;
2475 
2476   PetscFunctionBegin;
2477   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2478   *newmat = *dummy;
2479   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2484 {
2485   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2486   PetscErrorCode ierr;
2487 
2488   PetscFunctionBegin;
2489   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2490   A->factorerrortype = a->A->factorerrortype;
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2495 {
2496   PetscErrorCode ierr;
2497   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2498 
2499   PetscFunctionBegin;
2500   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2501   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2502   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2503   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2508 {
2509   PetscFunctionBegin;
2510   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2511   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 /*@
2516    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2517 
2518    Collective on Mat
2519 
2520    Input Parameters:
2521 +    A - the matrix
2522 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2523 
2524  Level: advanced
2525 
2526 @*/
2527 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2528 {
2529   PetscErrorCode       ierr;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2537 {
2538   PetscErrorCode       ierr;
2539   PetscBool            sc = PETSC_FALSE,flg;
2540 
2541   PetscFunctionBegin;
2542   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2543   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2544   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2545   if (flg) {
2546     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2547   }
2548   ierr = PetscOptionsTail();CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2553 {
2554   PetscErrorCode ierr;
2555   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2556   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2557 
2558   PetscFunctionBegin;
2559   if (!Y->preallocated) {
2560     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2561   } else if (!aij->nz) {
2562     PetscInt nonew = aij->nonew;
2563     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2564     aij->nonew = nonew;
2565   }
2566   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2571 {
2572   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2573   PetscErrorCode ierr;
2574 
2575   PetscFunctionBegin;
2576   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2577   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2578   if (d) {
2579     PetscInt rstart;
2580     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2581     *d += rstart;
2582 
2583   }
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2588 {
2589   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 /* -------------------------------------------------------------------*/
2598 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2599                                        MatGetRow_MPIAIJ,
2600                                        MatRestoreRow_MPIAIJ,
2601                                        MatMult_MPIAIJ,
2602                                 /* 4*/ MatMultAdd_MPIAIJ,
2603                                        MatMultTranspose_MPIAIJ,
2604                                        MatMultTransposeAdd_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*10*/ 0,
2609                                        0,
2610                                        0,
2611                                        MatSOR_MPIAIJ,
2612                                        MatTranspose_MPIAIJ,
2613                                 /*15*/ MatGetInfo_MPIAIJ,
2614                                        MatEqual_MPIAIJ,
2615                                        MatGetDiagonal_MPIAIJ,
2616                                        MatDiagonalScale_MPIAIJ,
2617                                        MatNorm_MPIAIJ,
2618                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2619                                        MatAssemblyEnd_MPIAIJ,
2620                                        MatSetOption_MPIAIJ,
2621                                        MatZeroEntries_MPIAIJ,
2622                                 /*24*/ MatZeroRows_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*29*/ MatSetUp_MPIAIJ,
2628                                        0,
2629                                        0,
2630                                        MatGetDiagonalBlock_MPIAIJ,
2631                                        0,
2632                                 /*34*/ MatDuplicate_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*39*/ MatAXPY_MPIAIJ,
2638                                        MatCreateSubMatrices_MPIAIJ,
2639                                        MatIncreaseOverlap_MPIAIJ,
2640                                        MatGetValues_MPIAIJ,
2641                                        MatCopy_MPIAIJ,
2642                                 /*44*/ MatGetRowMax_MPIAIJ,
2643                                        MatScale_MPIAIJ,
2644                                        MatShift_MPIAIJ,
2645                                        MatDiagonalSet_MPIAIJ,
2646                                        MatZeroRowsColumns_MPIAIJ,
2647                                 /*49*/ MatSetRandom_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2653                                        0,
2654                                        MatSetUnfactored_MPIAIJ,
2655                                        MatPermute_MPIAIJ,
2656                                        0,
2657                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2658                                        MatDestroy_MPIAIJ,
2659                                        MatView_MPIAIJ,
2660                                        0,
2661                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2662                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2663                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2668                                        MatGetRowMinAbs_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                 /*75*/ MatFDColoringApply_AIJ,
2674                                        MatSetFromOptions_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        MatFindZeroDiagonals_MPIAIJ,
2678                                 /*80*/ 0,
2679                                        0,
2680                                        0,
2681                                 /*83*/ MatLoad_MPIAIJ,
2682                                        MatIsSymmetric_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2688                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2689                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2690                                        MatPtAP_MPIAIJ_MPIAIJ,
2691                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2692                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                 /*99*/ 0,
2698                                        0,
2699                                        0,
2700                                        MatConjugate_MPIAIJ,
2701                                        0,
2702                                 /*104*/MatSetValuesRow_MPIAIJ,
2703                                        MatRealPart_MPIAIJ,
2704                                        MatImaginaryPart_MPIAIJ,
2705                                        0,
2706                                        0,
2707                                 /*109*/0,
2708                                        0,
2709                                        MatGetRowMin_MPIAIJ,
2710                                        0,
2711                                        MatMissingDiagonal_MPIAIJ,
2712                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2713                                        0,
2714                                        MatGetGhosts_MPIAIJ,
2715                                        0,
2716                                        0,
2717                                 /*119*/0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        MatGetMultiProcBlock_MPIAIJ,
2722                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2723                                        MatGetColumnNorms_MPIAIJ,
2724                                        MatInvertBlockDiagonal_MPIAIJ,
2725                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2726                                        MatCreateSubMatricesMPI_MPIAIJ,
2727                                 /*129*/0,
2728                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2729                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2730                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2731                                        0,
2732                                 /*134*/0,
2733                                        0,
2734                                        MatRARt_MPIAIJ_MPIAIJ,
2735                                        0,
2736                                        0,
2737                                 /*139*/MatSetBlockSizes_MPIAIJ,
2738                                        0,
2739                                        0,
2740                                        MatFDColoringSetUp_MPIXAIJ,
2741                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2742                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2743 };
2744 
2745 /* ----------------------------------------------------------------------------------------*/
2746 
2747 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2748 {
2749   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2754   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2759 {
2760   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2761   PetscErrorCode ierr;
2762 
2763   PetscFunctionBegin;
2764   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2765   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2770 {
2771   Mat_MPIAIJ     *b;
2772   PetscErrorCode ierr;
2773 
2774   PetscFunctionBegin;
2775   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2776   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2777   b = (Mat_MPIAIJ*)B->data;
2778 
2779 #if defined(PETSC_USE_CTABLE)
2780   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2781 #else
2782   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2783 #endif
2784   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2785   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2786   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2787 
2788   /* Because the B will have been resized we simply destroy it and create a new one each time */
2789   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2790   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2791   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2792   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2793   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2794   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2795 
2796   if (!B->preallocated) {
2797     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2798     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2799     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2800     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2801     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2802   }
2803 
2804   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2805   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2806   B->preallocated  = PETSC_TRUE;
2807   B->was_assembled = PETSC_FALSE;
2808   B->assembled     = PETSC_FALSE;;
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2813 {
2814   Mat_MPIAIJ     *b;
2815   PetscErrorCode ierr;
2816 
2817   PetscFunctionBegin;
2818   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   b = (Mat_MPIAIJ*)B->data;
2822 
2823 #if defined(PETSC_USE_CTABLE)
2824   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2825 #else
2826   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2827 #endif
2828   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2829   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2830   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2831 
2832   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2833   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2834   B->preallocated  = PETSC_TRUE;
2835   B->was_assembled = PETSC_FALSE;
2836   B->assembled = PETSC_FALSE;
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2841 {
2842   Mat            mat;
2843   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2844   PetscErrorCode ierr;
2845 
2846   PetscFunctionBegin;
2847   *newmat = 0;
2848   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2849   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2850   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2851   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2852   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2853   a       = (Mat_MPIAIJ*)mat->data;
2854 
2855   mat->factortype   = matin->factortype;
2856   mat->assembled    = PETSC_TRUE;
2857   mat->insertmode   = NOT_SET_VALUES;
2858   mat->preallocated = PETSC_TRUE;
2859 
2860   a->size         = oldmat->size;
2861   a->rank         = oldmat->rank;
2862   a->donotstash   = oldmat->donotstash;
2863   a->roworiented  = oldmat->roworiented;
2864   a->rowindices   = 0;
2865   a->rowvalues    = 0;
2866   a->getrowactive = PETSC_FALSE;
2867 
2868   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2869   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2870 
2871   if (oldmat->colmap) {
2872 #if defined(PETSC_USE_CTABLE)
2873     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2874 #else
2875     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2876     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2877     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2878 #endif
2879   } else a->colmap = 0;
2880   if (oldmat->garray) {
2881     PetscInt len;
2882     len  = oldmat->B->cmap->n;
2883     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2884     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2885     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2886   } else a->garray = 0;
2887 
2888   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2889   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2890   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2891   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2892 
2893   if (oldmat->Mvctx_mpi1) {
2894     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2895     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2896   }
2897 
2898   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2899   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2900   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2901   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2902   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2903   *newmat = mat;
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2908 {
2909   PetscScalar    *vals,*svals;
2910   MPI_Comm       comm;
2911   PetscErrorCode ierr;
2912   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2913   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2914   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2915   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2916   PetscInt       cend,cstart,n,*rowners;
2917   int            fd;
2918   PetscInt       bs = newMat->rmap->bs;
2919 
2920   PetscFunctionBegin;
2921   /* force binary viewer to load .info file if it has not yet done so */
2922   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2923   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2924   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2925   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2926   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2927   if (!rank) {
2928     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2929     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2930     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2931   }
2932 
2933   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2934   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2935   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2936   if (bs < 0) bs = 1;
2937 
2938   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2939   M    = header[1]; N = header[2];
2940 
2941   /* If global sizes are set, check if they are consistent with that given in the file */
2942   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2943   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2944 
2945   /* determine ownership of all (block) rows */
2946   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2947   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2948   else m = newMat->rmap->n; /* Set by user */
2949 
2950   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2951   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2952 
2953   /* First process needs enough room for process with most rows */
2954   if (!rank) {
2955     mmax = rowners[1];
2956     for (i=2; i<=size; i++) {
2957       mmax = PetscMax(mmax, rowners[i]);
2958     }
2959   } else mmax = -1;             /* unused, but compilers complain */
2960 
2961   rowners[0] = 0;
2962   for (i=2; i<=size; i++) {
2963     rowners[i] += rowners[i-1];
2964   }
2965   rstart = rowners[rank];
2966   rend   = rowners[rank+1];
2967 
2968   /* distribute row lengths to all processors */
2969   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2970   if (!rank) {
2971     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2972     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2973     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2974     for (j=0; j<m; j++) {
2975       procsnz[0] += ourlens[j];
2976     }
2977     for (i=1; i<size; i++) {
2978       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2979       /* calculate the number of nonzeros on each processor */
2980       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2981         procsnz[i] += rowlengths[j];
2982       }
2983       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2984     }
2985     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2986   } else {
2987     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2988   }
2989 
2990   if (!rank) {
2991     /* determine max buffer needed and allocate it */
2992     maxnz = 0;
2993     for (i=0; i<size; i++) {
2994       maxnz = PetscMax(maxnz,procsnz[i]);
2995     }
2996     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2997 
2998     /* read in my part of the matrix column indices  */
2999     nz   = procsnz[0];
3000     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3001     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3002 
3003     /* read in every one elses and ship off */
3004     for (i=1; i<size; i++) {
3005       nz   = procsnz[i];
3006       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3007       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3008     }
3009     ierr = PetscFree(cols);CHKERRQ(ierr);
3010   } else {
3011     /* determine buffer space needed for message */
3012     nz = 0;
3013     for (i=0; i<m; i++) {
3014       nz += ourlens[i];
3015     }
3016     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3017 
3018     /* receive message of column indices*/
3019     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3020   }
3021 
3022   /* determine column ownership if matrix is not square */
3023   if (N != M) {
3024     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3025     else n = newMat->cmap->n;
3026     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3027     cstart = cend - n;
3028   } else {
3029     cstart = rstart;
3030     cend   = rend;
3031     n      = cend - cstart;
3032   }
3033 
3034   /* loop over local rows, determining number of off diagonal entries */
3035   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3036   jj   = 0;
3037   for (i=0; i<m; i++) {
3038     for (j=0; j<ourlens[i]; j++) {
3039       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3040       jj++;
3041     }
3042   }
3043 
3044   for (i=0; i<m; i++) {
3045     ourlens[i] -= offlens[i];
3046   }
3047   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3048 
3049   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3050 
3051   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3052 
3053   for (i=0; i<m; i++) {
3054     ourlens[i] += offlens[i];
3055   }
3056 
3057   if (!rank) {
3058     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3059 
3060     /* read in my part of the matrix numerical values  */
3061     nz   = procsnz[0];
3062     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3063 
3064     /* insert into matrix */
3065     jj      = rstart;
3066     smycols = mycols;
3067     svals   = vals;
3068     for (i=0; i<m; i++) {
3069       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3070       smycols += ourlens[i];
3071       svals   += ourlens[i];
3072       jj++;
3073     }
3074 
3075     /* read in other processors and ship out */
3076     for (i=1; i<size; i++) {
3077       nz   = procsnz[i];
3078       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3079       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3080     }
3081     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3082   } else {
3083     /* receive numeric values */
3084     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3085 
3086     /* receive message of values*/
3087     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3088 
3089     /* insert into matrix */
3090     jj      = rstart;
3091     smycols = mycols;
3092     svals   = vals;
3093     for (i=0; i<m; i++) {
3094       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3095       smycols += ourlens[i];
3096       svals   += ourlens[i];
3097       jj++;
3098     }
3099   }
3100   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3101   ierr = PetscFree(vals);CHKERRQ(ierr);
3102   ierr = PetscFree(mycols);CHKERRQ(ierr);
3103   ierr = PetscFree(rowners);CHKERRQ(ierr);
3104   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3106   PetscFunctionReturn(0);
3107 }
3108 
3109 /* Not scalable because of ISAllGather() unless getting all columns. */
3110 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3111 {
3112   PetscErrorCode ierr;
3113   IS             iscol_local;
3114   PetscBool      isstride;
3115   PetscMPIInt    lisstride=0,gisstride;
3116 
3117   PetscFunctionBegin;
3118   /* check if we are grabbing all columns*/
3119   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3120 
3121   if (isstride) {
3122     PetscInt  start,len,mstart,mlen;
3123     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3124     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3125     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3126     if (mstart == start && mlen-mstart == len) lisstride = 1;
3127   }
3128 
3129   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3130   if (gisstride) {
3131     PetscInt N;
3132     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3133     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3134     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3135     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3136   } else {
3137     PetscInt cbs;
3138     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3139     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3140     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3141   }
3142 
3143   *isseq = iscol_local;
3144   PetscFunctionReturn(0);
3145 }
3146 
3147 /*
3148  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3149  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3150 
3151  Input Parameters:
3152    mat - matrix
3153    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3154            i.e., mat->rstart <= isrow[i] < mat->rend
3155    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3156            i.e., mat->cstart <= iscol[i] < mat->cend
3157  Output Parameter:
3158    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3159    iscol_o - sequential column index set for retrieving mat->B
3160    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3161  */
3162 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3163 {
3164   PetscErrorCode ierr;
3165   Vec            x,cmap;
3166   const PetscInt *is_idx;
3167   PetscScalar    *xarray,*cmaparray;
3168   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3169   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3170   Mat            B=a->B;
3171   Vec            lvec=a->lvec,lcmap;
3172   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3173   MPI_Comm       comm;
3174   VecScatter     Mvctx=a->Mvctx;
3175 
3176   PetscFunctionBegin;
3177   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3178   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3179 
3180   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3181   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3182   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3183   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3184   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3185 
3186   /* Get start indices */
3187   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3188   isstart -= ncols;
3189   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3190 
3191   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3192   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3193   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3194   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3195   for (i=0; i<ncols; i++) {
3196     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3197     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3198     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3199   }
3200   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3201   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3202   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3203 
3204   /* Get iscol_d */
3205   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3206   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3207   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3208 
3209   /* Get isrow_d */
3210   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3211   rstart = mat->rmap->rstart;
3212   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3213   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3214   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3215   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3216 
3217   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3218   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3219   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3220 
3221   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3222   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3223   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3224 
3225   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3226 
3227   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3228   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3229 
3230   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3231   /* off-process column indices */
3232   count = 0;
3233   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3234   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3235 
3236   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3237   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3238   for (i=0; i<Bn; i++) {
3239     if (PetscRealPart(xarray[i]) > -1.0) {
3240       idx[count]     = i;                   /* local column index in off-diagonal part B */
3241       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3242       count++;
3243     }
3244   }
3245   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3246   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3247 
3248   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3249   /* cannot ensure iscol_o has same blocksize as iscol! */
3250 
3251   ierr = PetscFree(idx);CHKERRQ(ierr);
3252   *garray = cmap1;
3253 
3254   ierr = VecDestroy(&x);CHKERRQ(ierr);
3255   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3256   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3261 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3262 {
3263   PetscErrorCode ierr;
3264   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3265   Mat            M = NULL;
3266   MPI_Comm       comm;
3267   IS             iscol_d,isrow_d,iscol_o;
3268   Mat            Asub = NULL,Bsub = NULL;
3269   PetscInt       n;
3270 
3271   PetscFunctionBegin;
3272   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3273 
3274   if (call == MAT_REUSE_MATRIX) {
3275     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3276     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3277     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3278 
3279     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3280     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3281 
3282     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3283     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3284 
3285     /* Update diagonal and off-diagonal portions of submat */
3286     asub = (Mat_MPIAIJ*)(*submat)->data;
3287     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3288     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3289     if (n) {
3290       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3291     }
3292     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3293     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3294 
3295   } else { /* call == MAT_INITIAL_MATRIX) */
3296     const PetscInt *garray;
3297     PetscInt        BsubN;
3298 
3299     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3300     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3301 
3302     /* Create local submatrices Asub and Bsub */
3303     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3304     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3305 
3306     /* Create submatrix M */
3307     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3308 
3309     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3310     asub = (Mat_MPIAIJ*)M->data;
3311 
3312     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3313     n = asub->B->cmap->N;
3314     if (BsubN > n) {
3315       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3316       const PetscInt *idx;
3317       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3318       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3319 
3320       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3321       j = 0;
3322       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3323       for (i=0; i<n; i++) {
3324         if (j >= BsubN) break;
3325         while (subgarray[i] > garray[j]) j++;
3326 
3327         if (subgarray[i] == garray[j]) {
3328           idx_new[i] = idx[j++];
3329         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3330       }
3331       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3332 
3333       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3334       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3335 
3336     } else if (BsubN < n) {
3337       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3338     }
3339 
3340     ierr = PetscFree(garray);CHKERRQ(ierr);
3341     *submat = M;
3342 
3343     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3344     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3345     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3346 
3347     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3348     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3349 
3350     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3351     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3352   }
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3357 {
3358   PetscErrorCode ierr;
3359   IS             iscol_local=NULL,isrow_d;
3360   PetscInt       csize;
3361   PetscInt       n,i,j,start,end;
3362   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3363   MPI_Comm       comm;
3364 
3365   PetscFunctionBegin;
3366   /* If isrow has same processor distribution as mat,
3367      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3368   if (call == MAT_REUSE_MATRIX) {
3369     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3370     if (isrow_d) {
3371       sameRowDist  = PETSC_TRUE;
3372       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3373     } else {
3374       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3375       if (iscol_local) {
3376         sameRowDist  = PETSC_TRUE;
3377         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3378       }
3379     }
3380   } else {
3381     /* Check if isrow has same processor distribution as mat */
3382     sameDist[0] = PETSC_FALSE;
3383     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3384     if (!n) {
3385       sameDist[0] = PETSC_TRUE;
3386     } else {
3387       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3388       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3389       if (i >= start && j < end) {
3390         sameDist[0] = PETSC_TRUE;
3391       }
3392     }
3393 
3394     /* Check if iscol has same processor distribution as mat */
3395     sameDist[1] = PETSC_FALSE;
3396     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3397     if (!n) {
3398       sameDist[1] = PETSC_TRUE;
3399     } else {
3400       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3401       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3402       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3403     }
3404 
3405     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3406     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3407     sameRowDist = tsameDist[0];
3408   }
3409 
3410   if (sameRowDist) {
3411     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3412       /* isrow and iscol have same processor distribution as mat */
3413       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3414       PetscFunctionReturn(0);
3415     } else { /* sameRowDist */
3416       /* isrow has same processor distribution as mat */
3417       if (call == MAT_INITIAL_MATRIX) {
3418         PetscBool sorted;
3419         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3420         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3421         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3422         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3423 
3424         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3425         if (sorted) {
3426           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3427           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3428           PetscFunctionReturn(0);
3429         }
3430       } else { /* call == MAT_REUSE_MATRIX */
3431         IS    iscol_sub;
3432         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3433         if (iscol_sub) {
3434           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3435           PetscFunctionReturn(0);
3436         }
3437       }
3438     }
3439   }
3440 
3441   /* General case: iscol -> iscol_local which has global size of iscol */
3442   if (call == MAT_REUSE_MATRIX) {
3443     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3444     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3445   } else {
3446     if (!iscol_local) {
3447       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3448     }
3449   }
3450 
3451   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3452   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3453 
3454   if (call == MAT_INITIAL_MATRIX) {
3455     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3456     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3457   }
3458   PetscFunctionReturn(0);
3459 }
3460 
3461 /*@C
3462      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3463          and "off-diagonal" part of the matrix in CSR format.
3464 
3465    Collective on MPI_Comm
3466 
3467    Input Parameters:
3468 +  comm - MPI communicator
3469 .  A - "diagonal" portion of matrix
3470 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3471 -  garray - global index of B columns
3472 
3473    Output Parameter:
3474 .   mat - the matrix, with input A as its local diagonal matrix
3475    Level: advanced
3476 
3477    Notes:
3478        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3479        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3480 
3481 .seealso: MatCreateMPIAIJWithSplitArrays()
3482 @*/
3483 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3484 {
3485   PetscErrorCode ierr;
3486   Mat_MPIAIJ     *maij;
3487   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3488   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3489   PetscScalar    *oa=b->a;
3490   Mat            Bnew;
3491   PetscInt       m,n,N;
3492 
3493   PetscFunctionBegin;
3494   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3495   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3496   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3497   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3498   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3499   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3500 
3501   /* Get global columns of mat */
3502   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3503 
3504   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3505   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3506   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3507   maij = (Mat_MPIAIJ*)(*mat)->data;
3508 
3509   (*mat)->preallocated = PETSC_TRUE;
3510 
3511   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3512   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3513 
3514   /* Set A as diagonal portion of *mat */
3515   maij->A = A;
3516 
3517   nz = oi[m];
3518   for (i=0; i<nz; i++) {
3519     col   = oj[i];
3520     oj[i] = garray[col];
3521   }
3522 
3523    /* Set Bnew as off-diagonal portion of *mat */
3524   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3525   bnew        = (Mat_SeqAIJ*)Bnew->data;
3526   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3527   maij->B     = Bnew;
3528 
3529   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3530 
3531   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3532   b->free_a       = PETSC_FALSE;
3533   b->free_ij      = PETSC_FALSE;
3534   ierr = MatDestroy(&B);CHKERRQ(ierr);
3535 
3536   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3537   bnew->free_a       = PETSC_TRUE;
3538   bnew->free_ij      = PETSC_TRUE;
3539 
3540   /* condense columns of maij->B */
3541   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3542   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3543   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3544   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3545   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3546   PetscFunctionReturn(0);
3547 }
3548 
3549 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3550 
3551 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3552 {
3553   PetscErrorCode ierr;
3554   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3555   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3556   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3557   Mat            M,Msub,B=a->B;
3558   MatScalar      *aa;
3559   Mat_SeqAIJ     *aij;
3560   PetscInt       *garray = a->garray,*colsub,Ncols;
3561   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3562   IS             iscol_sub,iscmap;
3563   const PetscInt *is_idx,*cmap;
3564   PetscBool      allcolumns=PETSC_FALSE;
3565   MPI_Comm       comm;
3566 
3567   PetscFunctionBegin;
3568   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3569 
3570   if (call == MAT_REUSE_MATRIX) {
3571     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3572     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3573     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3574 
3575     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3576     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3577 
3578     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3579     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3580 
3581     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3582 
3583   } else { /* call == MAT_INITIAL_MATRIX) */
3584     PetscBool flg;
3585 
3586     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3587     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3588 
3589     /* (1) iscol -> nonscalable iscol_local */
3590     /* Check for special case: each processor gets entire matrix columns */
3591     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3592     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3593     if (allcolumns) {
3594       iscol_sub = iscol_local;
3595       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3596       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3597 
3598     } else {
3599       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3600       PetscInt *idx,*cmap1,k;
3601       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3602       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3603       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3604       count = 0;
3605       k     = 0;
3606       for (i=0; i<Ncols; i++) {
3607         j = is_idx[i];
3608         if (j >= cstart && j < cend) {
3609           /* diagonal part of mat */
3610           idx[count]     = j;
3611           cmap1[count++] = i; /* column index in submat */
3612         } else if (Bn) {
3613           /* off-diagonal part of mat */
3614           if (j == garray[k]) {
3615             idx[count]     = j;
3616             cmap1[count++] = i;  /* column index in submat */
3617           } else if (j > garray[k]) {
3618             while (j > garray[k] && k < Bn-1) k++;
3619             if (j == garray[k]) {
3620               idx[count]     = j;
3621               cmap1[count++] = i; /* column index in submat */
3622             }
3623           }
3624         }
3625       }
3626       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3627 
3628       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3629       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3630       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3631 
3632       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3633     }
3634 
3635     /* (3) Create sequential Msub */
3636     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3637   }
3638 
3639   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3640   aij  = (Mat_SeqAIJ*)(Msub)->data;
3641   ii   = aij->i;
3642   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3643 
3644   /*
3645       m - number of local rows
3646       Ncols - number of columns (same on all processors)
3647       rstart - first row in new global matrix generated
3648   */
3649   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3650 
3651   if (call == MAT_INITIAL_MATRIX) {
3652     /* (4) Create parallel newmat */
3653     PetscMPIInt    rank,size;
3654     PetscInt       csize;
3655 
3656     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3657     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3658 
3659     /*
3660         Determine the number of non-zeros in the diagonal and off-diagonal
3661         portions of the matrix in order to do correct preallocation
3662     */
3663 
3664     /* first get start and end of "diagonal" columns */
3665     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3666     if (csize == PETSC_DECIDE) {
3667       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3668       if (mglobal == Ncols) { /* square matrix */
3669         nlocal = m;
3670       } else {
3671         nlocal = Ncols/size + ((Ncols % size) > rank);
3672       }
3673     } else {
3674       nlocal = csize;
3675     }
3676     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3677     rstart = rend - nlocal;
3678     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3679 
3680     /* next, compute all the lengths */
3681     jj    = aij->j;
3682     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3683     olens = dlens + m;
3684     for (i=0; i<m; i++) {
3685       jend = ii[i+1] - ii[i];
3686       olen = 0;
3687       dlen = 0;
3688       for (j=0; j<jend; j++) {
3689         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3690         else dlen++;
3691         jj++;
3692       }
3693       olens[i] = olen;
3694       dlens[i] = dlen;
3695     }
3696 
3697     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3698     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3699 
3700     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3701     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3702     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3703     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3704     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3705     ierr = PetscFree(dlens);CHKERRQ(ierr);
3706 
3707   } else { /* call == MAT_REUSE_MATRIX */
3708     M    = *newmat;
3709     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3710     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3711     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3712     /*
3713          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3714        rather than the slower MatSetValues().
3715     */
3716     M->was_assembled = PETSC_TRUE;
3717     M->assembled     = PETSC_FALSE;
3718   }
3719 
3720   /* (5) Set values of Msub to *newmat */
3721   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3722   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3723 
3724   jj   = aij->j;
3725   aa   = aij->a;
3726   for (i=0; i<m; i++) {
3727     row = rstart + i;
3728     nz  = ii[i+1] - ii[i];
3729     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3730     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3731     jj += nz; aa += nz;
3732   }
3733   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3734 
3735   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3736   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3737 
3738   ierr = PetscFree(colsub);CHKERRQ(ierr);
3739 
3740   /* save Msub, iscol_sub and iscmap used in processor for next request */
3741   if (call ==  MAT_INITIAL_MATRIX) {
3742     *newmat = M;
3743     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3744     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3745 
3746     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3747     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3748 
3749     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3750     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3751 
3752     if (iscol_local) {
3753       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3754       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3755     }
3756   }
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 /*
3761     Not great since it makes two copies of the submatrix, first an SeqAIJ
3762   in local and then by concatenating the local matrices the end result.
3763   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3764 
3765   Note: This requires a sequential iscol with all indices.
3766 */
3767 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3768 {
3769   PetscErrorCode ierr;
3770   PetscMPIInt    rank,size;
3771   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3772   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3773   Mat            M,Mreuse;
3774   MatScalar      *aa,*vwork;
3775   MPI_Comm       comm;
3776   Mat_SeqAIJ     *aij;
3777   PetscBool      colflag,allcolumns=PETSC_FALSE;
3778 
3779   PetscFunctionBegin;
3780   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3781   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3782   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3783 
3784   /* Check for special case: each processor gets entire matrix columns */
3785   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3786   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3787   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3788 
3789   if (call ==  MAT_REUSE_MATRIX) {
3790     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3791     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3792     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3793   } else {
3794     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3795   }
3796 
3797   /*
3798       m - number of local rows
3799       n - number of columns (same on all processors)
3800       rstart - first row in new global matrix generated
3801   */
3802   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3803   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3804   if (call == MAT_INITIAL_MATRIX) {
3805     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3806     ii  = aij->i;
3807     jj  = aij->j;
3808 
3809     /*
3810         Determine the number of non-zeros in the diagonal and off-diagonal
3811         portions of the matrix in order to do correct preallocation
3812     */
3813 
3814     /* first get start and end of "diagonal" columns */
3815     if (csize == PETSC_DECIDE) {
3816       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3817       if (mglobal == n) { /* square matrix */
3818         nlocal = m;
3819       } else {
3820         nlocal = n/size + ((n % size) > rank);
3821       }
3822     } else {
3823       nlocal = csize;
3824     }
3825     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3826     rstart = rend - nlocal;
3827     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3828 
3829     /* next, compute all the lengths */
3830     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3831     olens = dlens + m;
3832     for (i=0; i<m; i++) {
3833       jend = ii[i+1] - ii[i];
3834       olen = 0;
3835       dlen = 0;
3836       for (j=0; j<jend; j++) {
3837         if (*jj < rstart || *jj >= rend) olen++;
3838         else dlen++;
3839         jj++;
3840       }
3841       olens[i] = olen;
3842       dlens[i] = dlen;
3843     }
3844     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3845     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3846     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3847     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3848     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3849     ierr = PetscFree(dlens);CHKERRQ(ierr);
3850   } else {
3851     PetscInt ml,nl;
3852 
3853     M    = *newmat;
3854     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3855     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3856     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3857     /*
3858          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3859        rather than the slower MatSetValues().
3860     */
3861     M->was_assembled = PETSC_TRUE;
3862     M->assembled     = PETSC_FALSE;
3863   }
3864   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3865   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3866   ii   = aij->i;
3867   jj   = aij->j;
3868   aa   = aij->a;
3869   for (i=0; i<m; i++) {
3870     row   = rstart + i;
3871     nz    = ii[i+1] - ii[i];
3872     cwork = jj;     jj += nz;
3873     vwork = aa;     aa += nz;
3874     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3875   }
3876 
3877   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   *newmat = M;
3880 
3881   /* save submatrix used in processor for next request */
3882   if (call ==  MAT_INITIAL_MATRIX) {
3883     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3884     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3885   }
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3890 {
3891   PetscInt       m,cstart, cend,j,nnz,i,d;
3892   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3893   const PetscInt *JJ;
3894   PetscScalar    *values;
3895   PetscErrorCode ierr;
3896   PetscBool      nooffprocentries;
3897 
3898   PetscFunctionBegin;
3899   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3900 
3901   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3902   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3903   m      = B->rmap->n;
3904   cstart = B->cmap->rstart;
3905   cend   = B->cmap->rend;
3906   rstart = B->rmap->rstart;
3907 
3908   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3909 
3910 #if defined(PETSC_USE_DEBUG)
3911   for (i=0; i<m && Ii; i++) {
3912     nnz = Ii[i+1]- Ii[i];
3913     JJ  = J + Ii[i];
3914     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3915     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3916     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3917   }
3918 #endif
3919 
3920   for (i=0; i<m && Ii; i++) {
3921     nnz     = Ii[i+1]- Ii[i];
3922     JJ      = J + Ii[i];
3923     nnz_max = PetscMax(nnz_max,nnz);
3924     d       = 0;
3925     for (j=0; j<nnz; j++) {
3926       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3927     }
3928     d_nnz[i] = d;
3929     o_nnz[i] = nnz - d;
3930   }
3931   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3932   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3933 
3934   if (v) values = (PetscScalar*)v;
3935   else {
3936     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3937   }
3938 
3939   for (i=0; i<m && Ii; i++) {
3940     ii   = i + rstart;
3941     nnz  = Ii[i+1]- Ii[i];
3942     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3943   }
3944   nooffprocentries    = B->nooffprocentries;
3945   B->nooffprocentries = PETSC_TRUE;
3946   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3947   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3948   B->nooffprocentries = nooffprocentries;
3949 
3950   if (!v) {
3951     ierr = PetscFree(values);CHKERRQ(ierr);
3952   }
3953   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3954   PetscFunctionReturn(0);
3955 }
3956 
3957 /*@
3958    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3959    (the default parallel PETSc format).
3960 
3961    Collective on MPI_Comm
3962 
3963    Input Parameters:
3964 +  B - the matrix
3965 .  i - the indices into j for the start of each local row (starts with zero)
3966 .  j - the column indices for each local row (starts with zero)
3967 -  v - optional values in the matrix
3968 
3969    Level: developer
3970 
3971    Notes:
3972        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3973      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3974      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3975 
3976        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3977 
3978        The format which is used for the sparse matrix input, is equivalent to a
3979     row-major ordering.. i.e for the following matrix, the input data expected is
3980     as shown
3981 
3982 $        1 0 0
3983 $        2 0 3     P0
3984 $       -------
3985 $        4 5 6     P1
3986 $
3987 $     Process0 [P0]: rows_owned=[0,1]
3988 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3989 $        j =  {0,0,2}  [size = 3]
3990 $        v =  {1,2,3}  [size = 3]
3991 $
3992 $     Process1 [P1]: rows_owned=[2]
3993 $        i =  {0,3}    [size = nrow+1  = 1+1]
3994 $        j =  {0,1,2}  [size = 3]
3995 $        v =  {4,5,6}  [size = 3]
3996 
3997 .keywords: matrix, aij, compressed row, sparse, parallel
3998 
3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4000           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4001 @*/
4002 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4003 {
4004   PetscErrorCode ierr;
4005 
4006   PetscFunctionBegin;
4007   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4008   PetscFunctionReturn(0);
4009 }
4010 
4011 /*@C
4012    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4013    (the default parallel PETSc format).  For good matrix assembly performance
4014    the user should preallocate the matrix storage by setting the parameters
4015    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4016    performance can be increased by more than a factor of 50.
4017 
4018    Collective on MPI_Comm
4019 
4020    Input Parameters:
4021 +  B - the matrix
4022 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4023            (same value is used for all local rows)
4024 .  d_nnz - array containing the number of nonzeros in the various rows of the
4025            DIAGONAL portion of the local submatrix (possibly different for each row)
4026            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4027            The size of this array is equal to the number of local rows, i.e 'm'.
4028            For matrices that will be factored, you must leave room for (and set)
4029            the diagonal entry even if it is zero.
4030 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4031            submatrix (same value is used for all local rows).
4032 -  o_nnz - array containing the number of nonzeros in the various rows of the
4033            OFF-DIAGONAL portion of the local submatrix (possibly different for
4034            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4035            structure. The size of this array is equal to the number
4036            of local rows, i.e 'm'.
4037 
4038    If the *_nnz parameter is given then the *_nz parameter is ignored
4039 
4040    The AIJ format (also called the Yale sparse matrix format or
4041    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4042    storage.  The stored row and column indices begin with zero.
4043    See Users-Manual: ch_mat for details.
4044 
4045    The parallel matrix is partitioned such that the first m0 rows belong to
4046    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4047    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4048 
4049    The DIAGONAL portion of the local submatrix of a processor can be defined
4050    as the submatrix which is obtained by extraction the part corresponding to
4051    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4052    first row that belongs to the processor, r2 is the last row belonging to
4053    the this processor, and c1-c2 is range of indices of the local part of a
4054    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4055    common case of a square matrix, the row and column ranges are the same and
4056    the DIAGONAL part is also square. The remaining portion of the local
4057    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4058 
4059    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4060 
4061    You can call MatGetInfo() to get information on how effective the preallocation was;
4062    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4063    You can also run with the option -info and look for messages with the string
4064    malloc in them to see if additional memory allocation was needed.
4065 
4066    Example usage:
4067 
4068    Consider the following 8x8 matrix with 34 non-zero values, that is
4069    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4070    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4071    as follows:
4072 
4073 .vb
4074             1  2  0  |  0  3  0  |  0  4
4075     Proc0   0  5  6  |  7  0  0  |  8  0
4076             9  0 10  | 11  0  0  | 12  0
4077     -------------------------------------
4078            13  0 14  | 15 16 17  |  0  0
4079     Proc1   0 18  0  | 19 20 21  |  0  0
4080             0  0  0  | 22 23  0  | 24  0
4081     -------------------------------------
4082     Proc2  25 26 27  |  0  0 28  | 29  0
4083            30  0  0  | 31 32 33  |  0 34
4084 .ve
4085 
4086    This can be represented as a collection of submatrices as:
4087 
4088 .vb
4089       A B C
4090       D E F
4091       G H I
4092 .ve
4093 
4094    Where the submatrices A,B,C are owned by proc0, D,E,F are
4095    owned by proc1, G,H,I are owned by proc2.
4096 
4097    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4098    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4099    The 'M','N' parameters are 8,8, and have the same values on all procs.
4100 
4101    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4102    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4103    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4104    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4105    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4106    matrix, ans [DF] as another SeqAIJ matrix.
4107 
4108    When d_nz, o_nz parameters are specified, d_nz storage elements are
4109    allocated for every row of the local diagonal submatrix, and o_nz
4110    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4111    One way to choose d_nz and o_nz is to use the max nonzerors per local
4112    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4113    In this case, the values of d_nz,o_nz are:
4114 .vb
4115      proc0 : dnz = 2, o_nz = 2
4116      proc1 : dnz = 3, o_nz = 2
4117      proc2 : dnz = 1, o_nz = 4
4118 .ve
4119    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4120    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4121    for proc3. i.e we are using 12+15+10=37 storage locations to store
4122    34 values.
4123 
4124    When d_nnz, o_nnz parameters are specified, the storage is specified
4125    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4126    In the above case the values for d_nnz,o_nnz are:
4127 .vb
4128      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4129      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4130      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4131 .ve
4132    Here the space allocated is sum of all the above values i.e 34, and
4133    hence pre-allocation is perfect.
4134 
4135    Level: intermediate
4136 
4137 .keywords: matrix, aij, compressed row, sparse, parallel
4138 
4139 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4140           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4141 @*/
4142 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4143 {
4144   PetscErrorCode ierr;
4145 
4146   PetscFunctionBegin;
4147   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4148   PetscValidType(B,1);
4149   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4150   PetscFunctionReturn(0);
4151 }
4152 
4153 /*@
4154      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4155          CSR format the local rows.
4156 
4157    Collective on MPI_Comm
4158 
4159    Input Parameters:
4160 +  comm - MPI communicator
4161 .  m - number of local rows (Cannot be PETSC_DECIDE)
4162 .  n - This value should be the same as the local size used in creating the
4163        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4164        calculated if N is given) For square matrices n is almost always m.
4165 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4166 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4167 .   i - row indices
4168 .   j - column indices
4169 -   a - matrix values
4170 
4171    Output Parameter:
4172 .   mat - the matrix
4173 
4174    Level: intermediate
4175 
4176    Notes:
4177        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4178      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4179      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4180 
4181        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4182 
4183        The format which is used for the sparse matrix input, is equivalent to a
4184     row-major ordering.. i.e for the following matrix, the input data expected is
4185     as shown
4186 
4187 $        1 0 0
4188 $        2 0 3     P0
4189 $       -------
4190 $        4 5 6     P1
4191 $
4192 $     Process0 [P0]: rows_owned=[0,1]
4193 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4194 $        j =  {0,0,2}  [size = 3]
4195 $        v =  {1,2,3}  [size = 3]
4196 $
4197 $     Process1 [P1]: rows_owned=[2]
4198 $        i =  {0,3}    [size = nrow+1  = 1+1]
4199 $        j =  {0,1,2}  [size = 3]
4200 $        v =  {4,5,6}  [size = 3]
4201 
4202 .keywords: matrix, aij, compressed row, sparse, parallel
4203 
4204 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4205           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4206 @*/
4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4208 {
4209   PetscErrorCode ierr;
4210 
4211   PetscFunctionBegin;
4212   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4213   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4214   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4215   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4216   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4217   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4218   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4219   PetscFunctionReturn(0);
4220 }
4221 
4222 /*@C
4223    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4224    (the default parallel PETSc format).  For good matrix assembly performance
4225    the user should preallocate the matrix storage by setting the parameters
4226    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4227    performance can be increased by more than a factor of 50.
4228 
4229    Collective on MPI_Comm
4230 
4231    Input Parameters:
4232 +  comm - MPI communicator
4233 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4234            This value should be the same as the local size used in creating the
4235            y vector for the matrix-vector product y = Ax.
4236 .  n - This value should be the same as the local size used in creating the
4237        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4238        calculated if N is given) For square matrices n is almost always m.
4239 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4240 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4241 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4242            (same value is used for all local rows)
4243 .  d_nnz - array containing the number of nonzeros in the various rows of the
4244            DIAGONAL portion of the local submatrix (possibly different for each row)
4245            or NULL, if d_nz is used to specify the nonzero structure.
4246            The size of this array is equal to the number of local rows, i.e 'm'.
4247 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4248            submatrix (same value is used for all local rows).
4249 -  o_nnz - array containing the number of nonzeros in the various rows of the
4250            OFF-DIAGONAL portion of the local submatrix (possibly different for
4251            each row) or NULL, if o_nz is used to specify the nonzero
4252            structure. The size of this array is equal to the number
4253            of local rows, i.e 'm'.
4254 
4255    Output Parameter:
4256 .  A - the matrix
4257 
4258    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4259    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4260    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4261 
4262    Notes:
4263    If the *_nnz parameter is given then the *_nz parameter is ignored
4264 
4265    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4266    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4267    storage requirements for this matrix.
4268 
4269    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4270    processor than it must be used on all processors that share the object for
4271    that argument.
4272 
4273    The user MUST specify either the local or global matrix dimensions
4274    (possibly both).
4275 
4276    The parallel matrix is partitioned across processors such that the
4277    first m0 rows belong to process 0, the next m1 rows belong to
4278    process 1, the next m2 rows belong to process 2 etc.. where
4279    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4280    values corresponding to [m x N] submatrix.
4281 
4282    The columns are logically partitioned with the n0 columns belonging
4283    to 0th partition, the next n1 columns belonging to the next
4284    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4285 
4286    The DIAGONAL portion of the local submatrix on any given processor
4287    is the submatrix corresponding to the rows and columns m,n
4288    corresponding to the given processor. i.e diagonal matrix on
4289    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4290    etc. The remaining portion of the local submatrix [m x (N-n)]
4291    constitute the OFF-DIAGONAL portion. The example below better
4292    illustrates this concept.
4293 
4294    For a square global matrix we define each processor's diagonal portion
4295    to be its local rows and the corresponding columns (a square submatrix);
4296    each processor's off-diagonal portion encompasses the remainder of the
4297    local matrix (a rectangular submatrix).
4298 
4299    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4300 
4301    When calling this routine with a single process communicator, a matrix of
4302    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4303    type of communicator, use the construction mechanism
4304 .vb
4305      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4306 .ve
4307 
4308 $     MatCreate(...,&A);
4309 $     MatSetType(A,MATMPIAIJ);
4310 $     MatSetSizes(A, m,n,M,N);
4311 $     MatMPIAIJSetPreallocation(A,...);
4312 
4313    By default, this format uses inodes (identical nodes) when possible.
4314    We search for consecutive rows with the same nonzero structure, thereby
4315    reusing matrix information to achieve increased efficiency.
4316 
4317    Options Database Keys:
4318 +  -mat_no_inode  - Do not use inodes
4319 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4320 
4321 
4322 
4323    Example usage:
4324 
4325    Consider the following 8x8 matrix with 34 non-zero values, that is
4326    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4327    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4328    as follows
4329 
4330 .vb
4331             1  2  0  |  0  3  0  |  0  4
4332     Proc0   0  5  6  |  7  0  0  |  8  0
4333             9  0 10  | 11  0  0  | 12  0
4334     -------------------------------------
4335            13  0 14  | 15 16 17  |  0  0
4336     Proc1   0 18  0  | 19 20 21  |  0  0
4337             0  0  0  | 22 23  0  | 24  0
4338     -------------------------------------
4339     Proc2  25 26 27  |  0  0 28  | 29  0
4340            30  0  0  | 31 32 33  |  0 34
4341 .ve
4342 
4343    This can be represented as a collection of submatrices as
4344 
4345 .vb
4346       A B C
4347       D E F
4348       G H I
4349 .ve
4350 
4351    Where the submatrices A,B,C are owned by proc0, D,E,F are
4352    owned by proc1, G,H,I are owned by proc2.
4353 
4354    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4355    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4356    The 'M','N' parameters are 8,8, and have the same values on all procs.
4357 
4358    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4359    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4360    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4361    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4362    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4363    matrix, ans [DF] as another SeqAIJ matrix.
4364 
4365    When d_nz, o_nz parameters are specified, d_nz storage elements are
4366    allocated for every row of the local diagonal submatrix, and o_nz
4367    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4368    One way to choose d_nz and o_nz is to use the max nonzerors per local
4369    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4370    In this case, the values of d_nz,o_nz are
4371 .vb
4372      proc0 : dnz = 2, o_nz = 2
4373      proc1 : dnz = 3, o_nz = 2
4374      proc2 : dnz = 1, o_nz = 4
4375 .ve
4376    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4377    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4378    for proc3. i.e we are using 12+15+10=37 storage locations to store
4379    34 values.
4380 
4381    When d_nnz, o_nnz parameters are specified, the storage is specified
4382    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4383    In the above case the values for d_nnz,o_nnz are
4384 .vb
4385      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4386      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4387      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4388 .ve
4389    Here the space allocated is sum of all the above values i.e 34, and
4390    hence pre-allocation is perfect.
4391 
4392    Level: intermediate
4393 
4394 .keywords: matrix, aij, compressed row, sparse, parallel
4395 
4396 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4397           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4398 @*/
4399 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4400 {
4401   PetscErrorCode ierr;
4402   PetscMPIInt    size;
4403 
4404   PetscFunctionBegin;
4405   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4406   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4407   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4408   if (size > 1) {
4409     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4410     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4411   } else {
4412     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4413     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4414   }
4415   PetscFunctionReturn(0);
4416 }
4417 
4418 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4419 {
4420   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4421   PetscBool      flg;
4422   PetscErrorCode ierr;
4423 
4424   PetscFunctionBegin;
4425   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4426   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4427   if (Ad)     *Ad     = a->A;
4428   if (Ao)     *Ao     = a->B;
4429   if (colmap) *colmap = a->garray;
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4434 {
4435   PetscErrorCode ierr;
4436   PetscInt       m,N,i,rstart,nnz,Ii;
4437   PetscInt       *indx;
4438   PetscScalar    *values;
4439 
4440   PetscFunctionBegin;
4441   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4442   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4443     PetscInt       *dnz,*onz,sum,bs,cbs;
4444 
4445     if (n == PETSC_DECIDE) {
4446       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4447     }
4448     /* Check sum(n) = N */
4449     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4450     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4451 
4452     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4453     rstart -= m;
4454 
4455     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4456     for (i=0; i<m; i++) {
4457       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4458       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4459       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4460     }
4461 
4462     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4463     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4464     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4465     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4466     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4467     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4468     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4469     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4470   }
4471 
4472   /* numeric phase */
4473   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4474   for (i=0; i<m; i++) {
4475     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4476     Ii   = i + rstart;
4477     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4478     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4479   }
4480   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4481   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4482   PetscFunctionReturn(0);
4483 }
4484 
4485 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4486 {
4487   PetscErrorCode    ierr;
4488   PetscMPIInt       rank;
4489   PetscInt          m,N,i,rstart,nnz;
4490   size_t            len;
4491   const PetscInt    *indx;
4492   PetscViewer       out;
4493   char              *name;
4494   Mat               B;
4495   const PetscScalar *values;
4496 
4497   PetscFunctionBegin;
4498   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4499   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4500   /* Should this be the type of the diagonal block of A? */
4501   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4502   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4503   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4504   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4505   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4506   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4507   for (i=0; i<m; i++) {
4508     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4509     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4510     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4511   }
4512   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4513   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4514 
4515   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4516   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4517   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4518   sprintf(name,"%s.%d",outfile,rank);
4519   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4520   ierr = PetscFree(name);CHKERRQ(ierr);
4521   ierr = MatView(B,out);CHKERRQ(ierr);
4522   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4523   ierr = MatDestroy(&B);CHKERRQ(ierr);
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4528 {
4529   PetscErrorCode      ierr;
4530   Mat_Merge_SeqsToMPI *merge;
4531   PetscContainer      container;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4535   if (container) {
4536     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4537     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4538     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4539     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4540     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4541     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4542     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4549     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4550     ierr = PetscFree(merge);CHKERRQ(ierr);
4551     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4552   }
4553   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 #include <../src/mat/utils/freespace.h>
4558 #include <petscbt.h>
4559 
4560 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4561 {
4562   PetscErrorCode      ierr;
4563   MPI_Comm            comm;
4564   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4565   PetscMPIInt         size,rank,taga,*len_s;
4566   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4567   PetscInt            proc,m;
4568   PetscInt            **buf_ri,**buf_rj;
4569   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4570   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4571   MPI_Request         *s_waits,*r_waits;
4572   MPI_Status          *status;
4573   MatScalar           *aa=a->a;
4574   MatScalar           **abuf_r,*ba_i;
4575   Mat_Merge_SeqsToMPI *merge;
4576   PetscContainer      container;
4577 
4578   PetscFunctionBegin;
4579   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4580   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4581 
4582   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4583   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4584 
4585   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4586   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4587 
4588   bi     = merge->bi;
4589   bj     = merge->bj;
4590   buf_ri = merge->buf_ri;
4591   buf_rj = merge->buf_rj;
4592 
4593   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4594   owners = merge->rowmap->range;
4595   len_s  = merge->len_s;
4596 
4597   /* send and recv matrix values */
4598   /*-----------------------------*/
4599   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4600   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4601 
4602   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4603   for (proc=0,k=0; proc<size; proc++) {
4604     if (!len_s[proc]) continue;
4605     i    = owners[proc];
4606     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4607     k++;
4608   }
4609 
4610   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4611   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4612   ierr = PetscFree(status);CHKERRQ(ierr);
4613 
4614   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4615   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4616 
4617   /* insert mat values of mpimat */
4618   /*----------------------------*/
4619   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4620   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4621 
4622   for (k=0; k<merge->nrecv; k++) {
4623     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4624     nrows       = *(buf_ri_k[k]);
4625     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4626     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4627   }
4628 
4629   /* set values of ba */
4630   m = merge->rowmap->n;
4631   for (i=0; i<m; i++) {
4632     arow = owners[rank] + i;
4633     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4634     bnzi = bi[i+1] - bi[i];
4635     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4636 
4637     /* add local non-zero vals of this proc's seqmat into ba */
4638     anzi   = ai[arow+1] - ai[arow];
4639     aj     = a->j + ai[arow];
4640     aa     = a->a + ai[arow];
4641     nextaj = 0;
4642     for (j=0; nextaj<anzi; j++) {
4643       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4644         ba_i[j] += aa[nextaj++];
4645       }
4646     }
4647 
4648     /* add received vals into ba */
4649     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4650       /* i-th row */
4651       if (i == *nextrow[k]) {
4652         anzi   = *(nextai[k]+1) - *nextai[k];
4653         aj     = buf_rj[k] + *(nextai[k]);
4654         aa     = abuf_r[k] + *(nextai[k]);
4655         nextaj = 0;
4656         for (j=0; nextaj<anzi; j++) {
4657           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4658             ba_i[j] += aa[nextaj++];
4659           }
4660         }
4661         nextrow[k]++; nextai[k]++;
4662       }
4663     }
4664     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4665   }
4666   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4667   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4668 
4669   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4670   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4671   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4672   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4673   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4674   PetscFunctionReturn(0);
4675 }
4676 
4677 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4678 {
4679   PetscErrorCode      ierr;
4680   Mat                 B_mpi;
4681   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4682   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4683   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4684   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4685   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4686   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4687   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4688   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4689   MPI_Status          *status;
4690   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4691   PetscBT             lnkbt;
4692   Mat_Merge_SeqsToMPI *merge;
4693   PetscContainer      container;
4694 
4695   PetscFunctionBegin;
4696   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4697 
4698   /* make sure it is a PETSc comm */
4699   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4700   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4701   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4702 
4703   ierr = PetscNew(&merge);CHKERRQ(ierr);
4704   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4705 
4706   /* determine row ownership */
4707   /*---------------------------------------------------------*/
4708   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4709   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4710   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4711   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4712   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4713   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4714   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4715 
4716   m      = merge->rowmap->n;
4717   owners = merge->rowmap->range;
4718 
4719   /* determine the number of messages to send, their lengths */
4720   /*---------------------------------------------------------*/
4721   len_s = merge->len_s;
4722 
4723   len          = 0; /* length of buf_si[] */
4724   merge->nsend = 0;
4725   for (proc=0; proc<size; proc++) {
4726     len_si[proc] = 0;
4727     if (proc == rank) {
4728       len_s[proc] = 0;
4729     } else {
4730       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4731       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4732     }
4733     if (len_s[proc]) {
4734       merge->nsend++;
4735       nrows = 0;
4736       for (i=owners[proc]; i<owners[proc+1]; i++) {
4737         if (ai[i+1] > ai[i]) nrows++;
4738       }
4739       len_si[proc] = 2*(nrows+1);
4740       len         += len_si[proc];
4741     }
4742   }
4743 
4744   /* determine the number and length of messages to receive for ij-structure */
4745   /*-------------------------------------------------------------------------*/
4746   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4747   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4748 
4749   /* post the Irecv of j-structure */
4750   /*-------------------------------*/
4751   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4752   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4753 
4754   /* post the Isend of j-structure */
4755   /*--------------------------------*/
4756   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4757 
4758   for (proc=0, k=0; proc<size; proc++) {
4759     if (!len_s[proc]) continue;
4760     i    = owners[proc];
4761     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4762     k++;
4763   }
4764 
4765   /* receives and sends of j-structure are complete */
4766   /*------------------------------------------------*/
4767   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4768   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4769 
4770   /* send and recv i-structure */
4771   /*---------------------------*/
4772   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4773   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4774 
4775   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4776   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4777   for (proc=0,k=0; proc<size; proc++) {
4778     if (!len_s[proc]) continue;
4779     /* form outgoing message for i-structure:
4780          buf_si[0]:                 nrows to be sent
4781                [1:nrows]:           row index (global)
4782                [nrows+1:2*nrows+1]: i-structure index
4783     */
4784     /*-------------------------------------------*/
4785     nrows       = len_si[proc]/2 - 1;
4786     buf_si_i    = buf_si + nrows+1;
4787     buf_si[0]   = nrows;
4788     buf_si_i[0] = 0;
4789     nrows       = 0;
4790     for (i=owners[proc]; i<owners[proc+1]; i++) {
4791       anzi = ai[i+1] - ai[i];
4792       if (anzi) {
4793         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4794         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4795         nrows++;
4796       }
4797     }
4798     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4799     k++;
4800     buf_si += len_si[proc];
4801   }
4802 
4803   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4804   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4805 
4806   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4807   for (i=0; i<merge->nrecv; i++) {
4808     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4809   }
4810 
4811   ierr = PetscFree(len_si);CHKERRQ(ierr);
4812   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4813   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4814   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4815   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4816   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4817   ierr = PetscFree(status);CHKERRQ(ierr);
4818 
4819   /* compute a local seq matrix in each processor */
4820   /*----------------------------------------------*/
4821   /* allocate bi array and free space for accumulating nonzero column info */
4822   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4823   bi[0] = 0;
4824 
4825   /* create and initialize a linked list */
4826   nlnk = N+1;
4827   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4828 
4829   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4830   len  = ai[owners[rank+1]] - ai[owners[rank]];
4831   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4832 
4833   current_space = free_space;
4834 
4835   /* determine symbolic info for each local row */
4836   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4837 
4838   for (k=0; k<merge->nrecv; k++) {
4839     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4840     nrows       = *buf_ri_k[k];
4841     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4842     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4843   }
4844 
4845   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4846   len  = 0;
4847   for (i=0; i<m; i++) {
4848     bnzi = 0;
4849     /* add local non-zero cols of this proc's seqmat into lnk */
4850     arow  = owners[rank] + i;
4851     anzi  = ai[arow+1] - ai[arow];
4852     aj    = a->j + ai[arow];
4853     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4854     bnzi += nlnk;
4855     /* add received col data into lnk */
4856     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4857       if (i == *nextrow[k]) { /* i-th row */
4858         anzi  = *(nextai[k]+1) - *nextai[k];
4859         aj    = buf_rj[k] + *nextai[k];
4860         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4861         bnzi += nlnk;
4862         nextrow[k]++; nextai[k]++;
4863       }
4864     }
4865     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4866 
4867     /* if free space is not available, make more free space */
4868     if (current_space->local_remaining<bnzi) {
4869       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4870       nspacedouble++;
4871     }
4872     /* copy data into free space, then initialize lnk */
4873     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4874     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4875 
4876     current_space->array           += bnzi;
4877     current_space->local_used      += bnzi;
4878     current_space->local_remaining -= bnzi;
4879 
4880     bi[i+1] = bi[i] + bnzi;
4881   }
4882 
4883   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4884 
4885   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4886   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4887   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4888 
4889   /* create symbolic parallel matrix B_mpi */
4890   /*---------------------------------------*/
4891   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4892   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4893   if (n==PETSC_DECIDE) {
4894     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4895   } else {
4896     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4897   }
4898   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4899   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4900   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4901   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4902   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4903 
4904   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4905   B_mpi->assembled    = PETSC_FALSE;
4906   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4907   merge->bi           = bi;
4908   merge->bj           = bj;
4909   merge->buf_ri       = buf_ri;
4910   merge->buf_rj       = buf_rj;
4911   merge->coi          = NULL;
4912   merge->coj          = NULL;
4913   merge->owners_co    = NULL;
4914 
4915   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4916 
4917   /* attach the supporting struct to B_mpi for reuse */
4918   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4919   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4920   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4921   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4922   *mpimat = B_mpi;
4923 
4924   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 /*@C
4929       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4930                  matrices from each processor
4931 
4932     Collective on MPI_Comm
4933 
4934    Input Parameters:
4935 +    comm - the communicators the parallel matrix will live on
4936 .    seqmat - the input sequential matrices
4937 .    m - number of local rows (or PETSC_DECIDE)
4938 .    n - number of local columns (or PETSC_DECIDE)
4939 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4940 
4941    Output Parameter:
4942 .    mpimat - the parallel matrix generated
4943 
4944     Level: advanced
4945 
4946    Notes:
4947      The dimensions of the sequential matrix in each processor MUST be the same.
4948      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4949      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4950 @*/
4951 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4952 {
4953   PetscErrorCode ierr;
4954   PetscMPIInt    size;
4955 
4956   PetscFunctionBegin;
4957   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4958   if (size == 1) {
4959     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4960     if (scall == MAT_INITIAL_MATRIX) {
4961       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4962     } else {
4963       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4964     }
4965     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4966     PetscFunctionReturn(0);
4967   }
4968   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4969   if (scall == MAT_INITIAL_MATRIX) {
4970     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4971   }
4972   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4973   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4974   PetscFunctionReturn(0);
4975 }
4976 
4977 /*@
4978      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4979           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4980           with MatGetSize()
4981 
4982     Not Collective
4983 
4984    Input Parameters:
4985 +    A - the matrix
4986 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4987 
4988    Output Parameter:
4989 .    A_loc - the local sequential matrix generated
4990 
4991     Level: developer
4992 
4993 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4994 
4995 @*/
4996 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4997 {
4998   PetscErrorCode ierr;
4999   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5000   Mat_SeqAIJ     *mat,*a,*b;
5001   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5002   MatScalar      *aa,*ba,*cam;
5003   PetscScalar    *ca;
5004   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5005   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5006   PetscBool      match;
5007   MPI_Comm       comm;
5008   PetscMPIInt    size;
5009 
5010   PetscFunctionBegin;
5011   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5012   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5013   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5014   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5015   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5016 
5017   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5018   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5019   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5020   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5021   aa = a->a; ba = b->a;
5022   if (scall == MAT_INITIAL_MATRIX) {
5023     if (size == 1) {
5024       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5025       PetscFunctionReturn(0);
5026     }
5027 
5028     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5029     ci[0] = 0;
5030     for (i=0; i<am; i++) {
5031       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5032     }
5033     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5034     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5035     k    = 0;
5036     for (i=0; i<am; i++) {
5037       ncols_o = bi[i+1] - bi[i];
5038       ncols_d = ai[i+1] - ai[i];
5039       /* off-diagonal portion of A */
5040       for (jo=0; jo<ncols_o; jo++) {
5041         col = cmap[*bj];
5042         if (col >= cstart) break;
5043         cj[k]   = col; bj++;
5044         ca[k++] = *ba++;
5045       }
5046       /* diagonal portion of A */
5047       for (j=0; j<ncols_d; j++) {
5048         cj[k]   = cstart + *aj++;
5049         ca[k++] = *aa++;
5050       }
5051       /* off-diagonal portion of A */
5052       for (j=jo; j<ncols_o; j++) {
5053         cj[k]   = cmap[*bj++];
5054         ca[k++] = *ba++;
5055       }
5056     }
5057     /* put together the new matrix */
5058     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5059     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5060     /* Since these are PETSc arrays, change flags to free them as necessary. */
5061     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5062     mat->free_a  = PETSC_TRUE;
5063     mat->free_ij = PETSC_TRUE;
5064     mat->nonew   = 0;
5065   } else if (scall == MAT_REUSE_MATRIX) {
5066     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5067     ci = mat->i; cj = mat->j; cam = mat->a;
5068     for (i=0; i<am; i++) {
5069       /* off-diagonal portion of A */
5070       ncols_o = bi[i+1] - bi[i];
5071       for (jo=0; jo<ncols_o; jo++) {
5072         col = cmap[*bj];
5073         if (col >= cstart) break;
5074         *cam++ = *ba++; bj++;
5075       }
5076       /* diagonal portion of A */
5077       ncols_d = ai[i+1] - ai[i];
5078       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5079       /* off-diagonal portion of A */
5080       for (j=jo; j<ncols_o; j++) {
5081         *cam++ = *ba++; bj++;
5082       }
5083     }
5084   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5085   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5086   PetscFunctionReturn(0);
5087 }
5088 
5089 /*@C
5090      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5091 
5092     Not Collective
5093 
5094    Input Parameters:
5095 +    A - the matrix
5096 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5097 -    row, col - index sets of rows and columns to extract (or NULL)
5098 
5099    Output Parameter:
5100 .    A_loc - the local sequential matrix generated
5101 
5102     Level: developer
5103 
5104 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5105 
5106 @*/
5107 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5108 {
5109   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5110   PetscErrorCode ierr;
5111   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5112   IS             isrowa,iscola;
5113   Mat            *aloc;
5114   PetscBool      match;
5115 
5116   PetscFunctionBegin;
5117   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5118   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5119   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5120   if (!row) {
5121     start = A->rmap->rstart; end = A->rmap->rend;
5122     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5123   } else {
5124     isrowa = *row;
5125   }
5126   if (!col) {
5127     start = A->cmap->rstart;
5128     cmap  = a->garray;
5129     nzA   = a->A->cmap->n;
5130     nzB   = a->B->cmap->n;
5131     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5132     ncols = 0;
5133     for (i=0; i<nzB; i++) {
5134       if (cmap[i] < start) idx[ncols++] = cmap[i];
5135       else break;
5136     }
5137     imark = i;
5138     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5139     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5140     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5141   } else {
5142     iscola = *col;
5143   }
5144   if (scall != MAT_INITIAL_MATRIX) {
5145     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5146     aloc[0] = *A_loc;
5147   }
5148   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5149   if (!col) { /* attach global id of condensed columns */
5150     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5151   }
5152   *A_loc = aloc[0];
5153   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5154   if (!row) {
5155     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5156   }
5157   if (!col) {
5158     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5159   }
5160   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5161   PetscFunctionReturn(0);
5162 }
5163 
5164 /*@C
5165     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5166 
5167     Collective on Mat
5168 
5169    Input Parameters:
5170 +    A,B - the matrices in mpiaij format
5171 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5172 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5173 
5174    Output Parameter:
5175 +    rowb, colb - index sets of rows and columns of B to extract
5176 -    B_seq - the sequential matrix generated
5177 
5178     Level: developer
5179 
5180 @*/
5181 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5182 {
5183   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5184   PetscErrorCode ierr;
5185   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5186   IS             isrowb,iscolb;
5187   Mat            *bseq=NULL;
5188 
5189   PetscFunctionBegin;
5190   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5191     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5192   }
5193   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5194 
5195   if (scall == MAT_INITIAL_MATRIX) {
5196     start = A->cmap->rstart;
5197     cmap  = a->garray;
5198     nzA   = a->A->cmap->n;
5199     nzB   = a->B->cmap->n;
5200     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5201     ncols = 0;
5202     for (i=0; i<nzB; i++) {  /* row < local row index */
5203       if (cmap[i] < start) idx[ncols++] = cmap[i];
5204       else break;
5205     }
5206     imark = i;
5207     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5208     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5209     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5210     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5211   } else {
5212     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5213     isrowb  = *rowb; iscolb = *colb;
5214     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5215     bseq[0] = *B_seq;
5216   }
5217   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5218   *B_seq = bseq[0];
5219   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5220   if (!rowb) {
5221     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5222   } else {
5223     *rowb = isrowb;
5224   }
5225   if (!colb) {
5226     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5227   } else {
5228     *colb = iscolb;
5229   }
5230   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5231   PetscFunctionReturn(0);
5232 }
5233 
5234 #include <petsc/private/vecscatterimpl.h>
5235 /*
5236     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5237     of the OFF-DIAGONAL portion of local A
5238 
5239     Collective on Mat
5240 
5241    Input Parameters:
5242 +    A,B - the matrices in mpiaij format
5243 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5244 
5245    Output Parameter:
5246 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5247 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5248 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5249 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5250 
5251     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5252      for this matrix. This is not desirable..
5253 
5254     Level: developer
5255 
5256 */
5257 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5258 {
5259   VecScatter_MPI_General *gen_to,*gen_from;
5260   PetscErrorCode         ierr;
5261   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5262   Mat_SeqAIJ             *b_oth;
5263   VecScatter             ctx;
5264   MPI_Comm               comm;
5265   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5266   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5267   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5268   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5269   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5270   MPI_Request            *rwaits = NULL,*swaits = NULL;
5271   MPI_Status             *sstatus,rstatus;
5272   PetscMPIInt            jj,size;
5273   VecScatterType         type;
5274   PetscBool              mpi1;
5275 
5276   PetscFunctionBegin;
5277   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5279 
5280   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5281     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5282   }
5283   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5284   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5285 
5286   if (size == 1) {
5287     startsj_s = NULL;
5288     bufa_ptr  = NULL;
5289     *B_oth    = NULL;
5290     PetscFunctionReturn(0);
5291   }
5292 
5293   ctx = a->Mvctx;
5294   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5295   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5296   if (!mpi1) {
5297     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5298      thus create a->Mvctx_mpi1 */
5299     if (!a->Mvctx_mpi1) {
5300       a->Mvctx_mpi1_flg = PETSC_TRUE;
5301       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5302     }
5303     ctx = a->Mvctx_mpi1;
5304   }
5305   tag = ((PetscObject)ctx)->tag;
5306 
5307   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5308   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5309   nrecvs   = gen_from->n;
5310   nsends   = gen_to->n;
5311 
5312   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5313   srow    = gen_to->indices;    /* local row index to be sent */
5314   sstarts = gen_to->starts;
5315   sprocs  = gen_to->procs;
5316   sstatus = gen_to->sstatus;
5317   sbs     = gen_to->bs;
5318   rstarts = gen_from->starts;
5319   rprocs  = gen_from->procs;
5320   rbs     = gen_from->bs;
5321 
5322   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5323   if (scall == MAT_INITIAL_MATRIX) {
5324     /* i-array */
5325     /*---------*/
5326     /*  post receives */
5327     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5328     for (i=0; i<nrecvs; i++) {
5329       rowlen = rvalues + rstarts[i]*rbs;
5330       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5331       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5332     }
5333 
5334     /* pack the outgoing message */
5335     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5336 
5337     sstartsj[0] = 0;
5338     rstartsj[0] = 0;
5339     len         = 0; /* total length of j or a array to be sent */
5340     k           = 0;
5341     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5342     for (i=0; i<nsends; i++) {
5343       rowlen = svalues + sstarts[i]*sbs;
5344       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5345       for (j=0; j<nrows; j++) {
5346         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5347         for (l=0; l<sbs; l++) {
5348           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5349 
5350           rowlen[j*sbs+l] = ncols;
5351 
5352           len += ncols;
5353           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5354         }
5355         k++;
5356       }
5357       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5358 
5359       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5360     }
5361     /* recvs and sends of i-array are completed */
5362     i = nrecvs;
5363     while (i--) {
5364       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5365     }
5366     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5367     ierr = PetscFree(svalues);CHKERRQ(ierr);
5368 
5369     /* allocate buffers for sending j and a arrays */
5370     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5371     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5372 
5373     /* create i-array of B_oth */
5374     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5375 
5376     b_othi[0] = 0;
5377     len       = 0; /* total length of j or a array to be received */
5378     k         = 0;
5379     for (i=0; i<nrecvs; i++) {
5380       rowlen = rvalues + rstarts[i]*rbs;
5381       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5382       for (j=0; j<nrows; j++) {
5383         b_othi[k+1] = b_othi[k] + rowlen[j];
5384         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5385         k++;
5386       }
5387       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5388     }
5389     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5390 
5391     /* allocate space for j and a arrrays of B_oth */
5392     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5393     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5394 
5395     /* j-array */
5396     /*---------*/
5397     /*  post receives of j-array */
5398     for (i=0; i<nrecvs; i++) {
5399       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5400       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5401     }
5402 
5403     /* pack the outgoing message j-array */
5404     k = 0;
5405     for (i=0; i<nsends; i++) {
5406       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5407       bufJ  = bufj+sstartsj[i];
5408       for (j=0; j<nrows; j++) {
5409         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5410         for (ll=0; ll<sbs; ll++) {
5411           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5412           for (l=0; l<ncols; l++) {
5413             *bufJ++ = cols[l];
5414           }
5415           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5416         }
5417       }
5418       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5419     }
5420 
5421     /* recvs and sends of j-array are completed */
5422     i = nrecvs;
5423     while (i--) {
5424       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5425     }
5426     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5427   } else if (scall == MAT_REUSE_MATRIX) {
5428     sstartsj = *startsj_s;
5429     rstartsj = *startsj_r;
5430     bufa     = *bufa_ptr;
5431     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5432     b_otha   = b_oth->a;
5433   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5434 
5435   /* a-array */
5436   /*---------*/
5437   /*  post receives of a-array */
5438   for (i=0; i<nrecvs; i++) {
5439     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5440     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5441   }
5442 
5443   /* pack the outgoing message a-array */
5444   k = 0;
5445   for (i=0; i<nsends; i++) {
5446     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5447     bufA  = bufa+sstartsj[i];
5448     for (j=0; j<nrows; j++) {
5449       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5450       for (ll=0; ll<sbs; ll++) {
5451         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5452         for (l=0; l<ncols; l++) {
5453           *bufA++ = vals[l];
5454         }
5455         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5456       }
5457     }
5458     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5459   }
5460   /* recvs and sends of a-array are completed */
5461   i = nrecvs;
5462   while (i--) {
5463     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5464   }
5465   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5466   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5467 
5468   if (scall == MAT_INITIAL_MATRIX) {
5469     /* put together the new matrix */
5470     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5471 
5472     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5473     /* Since these are PETSc arrays, change flags to free them as necessary. */
5474     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5475     b_oth->free_a  = PETSC_TRUE;
5476     b_oth->free_ij = PETSC_TRUE;
5477     b_oth->nonew   = 0;
5478 
5479     ierr = PetscFree(bufj);CHKERRQ(ierr);
5480     if (!startsj_s || !bufa_ptr) {
5481       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5482       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5483     } else {
5484       *startsj_s = sstartsj;
5485       *startsj_r = rstartsj;
5486       *bufa_ptr  = bufa;
5487     }
5488   }
5489   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5490   PetscFunctionReturn(0);
5491 }
5492 
5493 /*@C
5494   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5495 
5496   Not Collective
5497 
5498   Input Parameters:
5499 . A - The matrix in mpiaij format
5500 
5501   Output Parameter:
5502 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5503 . colmap - A map from global column index to local index into lvec
5504 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5505 
5506   Level: developer
5507 
5508 @*/
5509 #if defined(PETSC_USE_CTABLE)
5510 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5511 #else
5512 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5513 #endif
5514 {
5515   Mat_MPIAIJ *a;
5516 
5517   PetscFunctionBegin;
5518   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5519   PetscValidPointer(lvec, 2);
5520   PetscValidPointer(colmap, 3);
5521   PetscValidPointer(multScatter, 4);
5522   a = (Mat_MPIAIJ*) A->data;
5523   if (lvec) *lvec = a->lvec;
5524   if (colmap) *colmap = a->colmap;
5525   if (multScatter) *multScatter = a->Mvctx;
5526   PetscFunctionReturn(0);
5527 }
5528 
5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5530 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5532 #if defined(PETSC_HAVE_MKL_SPARSE)
5533 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5534 #endif
5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5536 #if defined(PETSC_HAVE_ELEMENTAL)
5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5538 #endif
5539 #if defined(PETSC_HAVE_HYPRE)
5540 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5541 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5542 #endif
5543 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5544 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5545 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5546 
5547 /*
5548     Computes (B'*A')' since computing B*A directly is untenable
5549 
5550                n                       p                          p
5551         (              )       (              )         (                  )
5552       m (      A       )  *  n (       B      )   =   m (         C        )
5553         (              )       (              )         (                  )
5554 
5555 */
5556 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5557 {
5558   PetscErrorCode ierr;
5559   Mat            At,Bt,Ct;
5560 
5561   PetscFunctionBegin;
5562   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5563   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5564   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5565   ierr = MatDestroy(&At);CHKERRQ(ierr);
5566   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5567   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5568   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5569   PetscFunctionReturn(0);
5570 }
5571 
5572 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5573 {
5574   PetscErrorCode ierr;
5575   PetscInt       m=A->rmap->n,n=B->cmap->n;
5576   Mat            Cmat;
5577 
5578   PetscFunctionBegin;
5579   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5580   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5581   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5582   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5583   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5584   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5585   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5586   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5587 
5588   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5589 
5590   *C = Cmat;
5591   PetscFunctionReturn(0);
5592 }
5593 
5594 /* ----------------------------------------------------------------*/
5595 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5596 {
5597   PetscErrorCode ierr;
5598 
5599   PetscFunctionBegin;
5600   if (scall == MAT_INITIAL_MATRIX) {
5601     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5602     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5603     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5604   }
5605   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5606   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5607   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5608   PetscFunctionReturn(0);
5609 }
5610 
5611 /*MC
5612    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5613 
5614    Options Database Keys:
5615 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5616 
5617   Level: beginner
5618 
5619 .seealso: MatCreateAIJ()
5620 M*/
5621 
5622 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5623 {
5624   Mat_MPIAIJ     *b;
5625   PetscErrorCode ierr;
5626   PetscMPIInt    size;
5627 
5628   PetscFunctionBegin;
5629   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5630 
5631   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5632   B->data       = (void*)b;
5633   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5634   B->assembled  = PETSC_FALSE;
5635   B->insertmode = NOT_SET_VALUES;
5636   b->size       = size;
5637 
5638   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5639 
5640   /* build cache for off array entries formed */
5641   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5642 
5643   b->donotstash  = PETSC_FALSE;
5644   b->colmap      = 0;
5645   b->garray      = 0;
5646   b->roworiented = PETSC_TRUE;
5647 
5648   /* stuff used for matrix vector multiply */
5649   b->lvec  = NULL;
5650   b->Mvctx = NULL;
5651 
5652   /* stuff for MatGetRow() */
5653   b->rowindices   = 0;
5654   b->rowvalues    = 0;
5655   b->getrowactive = PETSC_FALSE;
5656 
5657   /* flexible pointer used in CUSP/CUSPARSE classes */
5658   b->spptr = NULL;
5659 
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5670 #if defined(PETSC_HAVE_MKL_SPARSE)
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5672 #endif
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5675 #if defined(PETSC_HAVE_ELEMENTAL)
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5677 #endif
5678 #if defined(PETSC_HAVE_HYPRE)
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5680 #endif
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5686 #if defined(PETSC_HAVE_HYPRE)
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5688 #endif
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5691   PetscFunctionReturn(0);
5692 }
5693 
5694 /*@C
5695      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5696          and "off-diagonal" part of the matrix in CSR format.
5697 
5698    Collective on MPI_Comm
5699 
5700    Input Parameters:
5701 +  comm - MPI communicator
5702 .  m - number of local rows (Cannot be PETSC_DECIDE)
5703 .  n - This value should be the same as the local size used in creating the
5704        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5705        calculated if N is given) For square matrices n is almost always m.
5706 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5707 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5708 .   i - row indices for "diagonal" portion of matrix
5709 .   j - column indices
5710 .   a - matrix values
5711 .   oi - row indices for "off-diagonal" portion of matrix
5712 .   oj - column indices
5713 -   oa - matrix values
5714 
5715    Output Parameter:
5716 .   mat - the matrix
5717 
5718    Level: advanced
5719 
5720    Notes:
5721        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5722        must free the arrays once the matrix has been destroyed and not before.
5723 
5724        The i and j indices are 0 based
5725 
5726        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5727 
5728        This sets local rows and cannot be used to set off-processor values.
5729 
5730        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5731        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5732        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5733        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5734        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5735        communication if it is known that only local entries will be set.
5736 
5737 .keywords: matrix, aij, compressed row, sparse, parallel
5738 
5739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5740           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5741 @*/
5742 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5743 {
5744   PetscErrorCode ierr;
5745   Mat_MPIAIJ     *maij;
5746 
5747   PetscFunctionBegin;
5748   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5749   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5750   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5751   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5752   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5753   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5754   maij = (Mat_MPIAIJ*) (*mat)->data;
5755 
5756   (*mat)->preallocated = PETSC_TRUE;
5757 
5758   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5759   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5760 
5761   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5762   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5763 
5764   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5765   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5766   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5767   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5768 
5769   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5770   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5771   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5773   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5774   PetscFunctionReturn(0);
5775 }
5776 
5777 /*
5778     Special version for direct calls from Fortran
5779 */
5780 #include <petsc/private/fortranimpl.h>
5781 
5782 /* Change these macros so can be used in void function */
5783 #undef CHKERRQ
5784 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5785 #undef SETERRQ2
5786 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5787 #undef SETERRQ3
5788 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5789 #undef SETERRQ
5790 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5791 
5792 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5793 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5794 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5795 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5796 #else
5797 #endif
5798 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5799 {
5800   Mat            mat  = *mmat;
5801   PetscInt       m    = *mm, n = *mn;
5802   InsertMode     addv = *maddv;
5803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5804   PetscScalar    value;
5805   PetscErrorCode ierr;
5806 
5807   MatCheckPreallocated(mat,1);
5808   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5809 
5810 #if defined(PETSC_USE_DEBUG)
5811   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5812 #endif
5813   {
5814     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5815     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5816     PetscBool roworiented = aij->roworiented;
5817 
5818     /* Some Variables required in the macro */
5819     Mat        A                 = aij->A;
5820     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5821     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5822     MatScalar  *aa               = a->a;
5823     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5824     Mat        B                 = aij->B;
5825     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5826     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5827     MatScalar  *ba               = b->a;
5828 
5829     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5830     PetscInt  nonew = a->nonew;
5831     MatScalar *ap1,*ap2;
5832 
5833     PetscFunctionBegin;
5834     for (i=0; i<m; i++) {
5835       if (im[i] < 0) continue;
5836 #if defined(PETSC_USE_DEBUG)
5837       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5838 #endif
5839       if (im[i] >= rstart && im[i] < rend) {
5840         row      = im[i] - rstart;
5841         lastcol1 = -1;
5842         rp1      = aj + ai[row];
5843         ap1      = aa + ai[row];
5844         rmax1    = aimax[row];
5845         nrow1    = ailen[row];
5846         low1     = 0;
5847         high1    = nrow1;
5848         lastcol2 = -1;
5849         rp2      = bj + bi[row];
5850         ap2      = ba + bi[row];
5851         rmax2    = bimax[row];
5852         nrow2    = bilen[row];
5853         low2     = 0;
5854         high2    = nrow2;
5855 
5856         for (j=0; j<n; j++) {
5857           if (roworiented) value = v[i*n+j];
5858           else value = v[i+j*m];
5859           if (in[j] >= cstart && in[j] < cend) {
5860             col = in[j] - cstart;
5861             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5862             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5863           } else if (in[j] < 0) continue;
5864 #if defined(PETSC_USE_DEBUG)
5865           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5866           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5867 #endif
5868           else {
5869             if (mat->was_assembled) {
5870               if (!aij->colmap) {
5871                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5872               }
5873 #if defined(PETSC_USE_CTABLE)
5874               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5875               col--;
5876 #else
5877               col = aij->colmap[in[j]] - 1;
5878 #endif
5879               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5880               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5881                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5882                 col  =  in[j];
5883                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5884                 B     = aij->B;
5885                 b     = (Mat_SeqAIJ*)B->data;
5886                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5887                 rp2   = bj + bi[row];
5888                 ap2   = ba + bi[row];
5889                 rmax2 = bimax[row];
5890                 nrow2 = bilen[row];
5891                 low2  = 0;
5892                 high2 = nrow2;
5893                 bm    = aij->B->rmap->n;
5894                 ba    = b->a;
5895               }
5896             } else col = in[j];
5897             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5898           }
5899         }
5900       } else if (!aij->donotstash) {
5901         if (roworiented) {
5902           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5903         } else {
5904           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5905         }
5906       }
5907     }
5908   }
5909   PetscFunctionReturnVoid();
5910 }
5911