xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 285fb4e2b69b3de46a0633bd0adc6a7f684caa1e)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125 
126   PetscFunctionBegin;
127   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
128     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
129   } else {
130     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
138   PetscErrorCode ierr;
139   PetscInt       i,rstart,nrows,*rows;
140 
141   PetscFunctionBegin;
142   *zrows = NULL;
143   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
144   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
145   for (i=0; i<nrows; i++) rows[i] += rstart;
146   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
154   PetscInt       i,n,*garray = aij->garray;
155   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
156   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
157   PetscReal      *work;
158 
159   PetscFunctionBegin;
160   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
161   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
162   if (type == NORM_2) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
168     }
169   } else if (type == NORM_1) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
175     }
176   } else if (type == NORM_INFINITY) {
177     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
178       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
179     }
180     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
181       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
182     }
183 
184   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
185   if (type == NORM_INFINITY) {
186     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
187   } else {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   }
190   ierr = PetscFree(work);CHKERRQ(ierr);
191   if (type == NORM_2) {
192     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
198 {
199   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
200   IS              sis,gis;
201   PetscErrorCode  ierr;
202   const PetscInt  *isis,*igis;
203   PetscInt        n,*iis,nsis,ngis,rstart,i;
204 
205   PetscFunctionBegin;
206   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
207   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
208   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
209   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
210   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
211   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
212 
213   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
215   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
216   n    = ngis + nsis;
217   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
218   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
219   for (i=0; i<n; i++) iis[i] += rstart;
220   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
221 
222   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
223   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
224   ierr = ISDestroy(&sis);CHKERRQ(ierr);
225   ierr = ISDestroy(&gis);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 /*
230     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
231     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
232 
233     Only for square matrices
234 
235     Used by a preconditioner, hence PETSC_EXTERN
236 */
237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
238 {
239   PetscMPIInt    rank,size;
240   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
241   PetscErrorCode ierr;
242   Mat            mat;
243   Mat_SeqAIJ     *gmata;
244   PetscMPIInt    tag;
245   MPI_Status     status;
246   PetscBool      aij;
247   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
248 
249   PetscFunctionBegin;
250   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
252   if (!rank) {
253     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
254     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
255   }
256   if (reuse == MAT_INITIAL_MATRIX) {
257     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
258     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
259     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
260     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
261     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
262     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
263     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
264     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
265     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
266 
267     rowners[0] = 0;
268     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
269     rstart = rowners[rank];
270     rend   = rowners[rank+1];
271     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
272     if (!rank) {
273       gmata = (Mat_SeqAIJ*) gmat->data;
274       /* send row lengths to all processors */
275       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
276       for (i=1; i<size; i++) {
277         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
278       }
279       /* determine number diagonal and off-diagonal counts */
280       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
281       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
282       jj   = 0;
283       for (i=0; i<m; i++) {
284         for (j=0; j<dlens[i]; j++) {
285           if (gmata->j[jj] < rstart) ld[i]++;
286           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
287           jj++;
288         }
289       }
290       /* send column indices to other processes */
291       for (i=1; i<size; i++) {
292         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
293         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
295       }
296 
297       /* send numerical values to other processes */
298       for (i=1; i<size; i++) {
299         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
300         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
301       }
302       gmataa = gmata->a;
303       gmataj = gmata->j;
304 
305     } else {
306       /* receive row lengths */
307       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       /* receive column indices */
309       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
311       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       /* determine number diagonal and off-diagonal counts */
313       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
314       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
315       jj   = 0;
316       for (i=0; i<m; i++) {
317         for (j=0; j<dlens[i]; j++) {
318           if (gmataj[jj] < rstart) ld[i]++;
319           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
320           jj++;
321         }
322       }
323       /* receive numerical values */
324       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
325       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
326     }
327     /* set preallocation */
328     for (i=0; i<m; i++) {
329       dlens[i] -= olens[i];
330     }
331     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
332     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
333 
334     for (i=0; i<m; i++) {
335       dlens[i] += olens[i];
336     }
337     cnt = 0;
338     for (i=0; i<m; i++) {
339       row  = rstart + i;
340       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
341       cnt += dlens[i];
342     }
343     if (rank) {
344       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
345     }
346     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
347     ierr = PetscFree(rowners);CHKERRQ(ierr);
348 
349     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
350 
351     *inmat = mat;
352   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
353     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
354     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
355     mat  = *inmat;
356     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
357     if (!rank) {
358       /* send numerical values to other processes */
359       gmata  = (Mat_SeqAIJ*) gmat->data;
360       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
361       gmataa = gmata->a;
362       for (i=1; i<size; i++) {
363         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
364         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
365       }
366       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
367     } else {
368       /* receive numerical values from process 0*/
369       nz   = Ad->nz + Ao->nz;
370       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
371       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
372     }
373     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
374     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
375     ad = Ad->a;
376     ao = Ao->a;
377     if (mat->rmap->n) {
378       i  = 0;
379       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
380       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
381     }
382     for (i=1; i<mat->rmap->n; i++) {
383       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
384       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
385     }
386     i--;
387     if (mat->rmap->n) {
388       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
389     }
390     if (rank) {
391       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
392     }
393   }
394   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
396   PetscFunctionReturn(0);
397 }
398 
399 /*
400   Local utility routine that creates a mapping from the global column
401 number to the local number in the off-diagonal part of the local
402 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
403 a slightly higher hash table cost; without it it is not scalable (each processor
404 has an order N integer array but is fast to acess.
405 */
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
463   { \
464     if (col <= lastcol2) low2 = 0;                        \
465     else high2 = nrow2;                                   \
466     lastcol2 = col;                                       \
467     while (high2-low2 > 5) {                              \
468       t = (low2+high2)/2;                                 \
469       if (rp2[t] > col) high2 = t;                        \
470       else             low2  = t;                         \
471     }                                                     \
472     for (_i=low2; _i<high2; _i++) {                       \
473       if (rp2[_i] > col) break;                           \
474       if (rp2[_i] == col) {                               \
475         if (addv == ADD_VALUES) ap2[_i] += value;         \
476         else                    ap2[_i] = value;          \
477         goto b_noinsert;                                  \
478       }                                                   \
479     }                                                     \
480     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
481     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
482     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
483     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
484     N = nrow2++ - 1; b->nz++; high2++;                    \
485     /* shift up all the later entries in this row */      \
486     for (ii=N; ii>=_i; ii--) {                            \
487       rp2[ii+1] = rp2[ii];                                \
488       ap2[ii+1] = ap2[ii];                                \
489     }                                                     \
490     rp2[_i] = col;                                        \
491     ap2[_i] = value;                                      \
492     B->nonzerostate++;                                    \
493     b_noinsert: ;                                         \
494     bilen[row] = nrow2;                                   \
495   }
496 
497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
498 {
499   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
500   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
501   PetscErrorCode ierr;
502   PetscInt       l,*garray = mat->garray,diag;
503 
504   PetscFunctionBegin;
505   /* code only works for square matrices A */
506 
507   /* find size of row to the left of the diagonal part */
508   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
509   row  = row - diag;
510   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
511     if (garray[b->j[b->i[row]+l]] > diag) break;
512   }
513   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
514 
515   /* diagonal part */
516   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* right of diagonal part */
519   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
524 {
525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
526   PetscScalar    value;
527   PetscErrorCode ierr;
528   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
529   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
530   PetscBool      roworiented = aij->roworiented;
531 
532   /* Some Variables required in the macro */
533   Mat        A                 = aij->A;
534   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
535   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
536   MatScalar  *aa               = a->a;
537   PetscBool  ignorezeroentries = a->ignorezeroentries;
538   Mat        B                 = aij->B;
539   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
540   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
541   MatScalar  *ba               = b->a;
542 
543   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
544   PetscInt  nonew;
545   MatScalar *ap1,*ap2;
546 
547   PetscFunctionBegin;
548   for (i=0; i<m; i++) {
549     if (im[i] < 0) continue;
550 #if defined(PETSC_USE_DEBUG)
551     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
552 #endif
553     if (im[i] >= rstart && im[i] < rend) {
554       row      = im[i] - rstart;
555       lastcol1 = -1;
556       rp1      = aj + ai[row];
557       ap1      = aa + ai[row];
558       rmax1    = aimax[row];
559       nrow1    = ailen[row];
560       low1     = 0;
561       high1    = nrow1;
562       lastcol2 = -1;
563       rp2      = bj + bi[row];
564       ap2      = ba + bi[row];
565       rmax2    = bimax[row];
566       nrow2    = bilen[row];
567       low2     = 0;
568       high2    = nrow2;
569 
570       for (j=0; j<n; j++) {
571         if (roworiented) value = v[i*n+j];
572         else             value = v[i+j*m];
573         if (in[j] >= cstart && in[j] < cend) {
574           col   = in[j] - cstart;
575           nonew = a->nonew;
576           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
577           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
578         } else if (in[j] < 0) continue;
579 #if defined(PETSC_USE_DEBUG)
580         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
581 #endif
582         else {
583           if (mat->was_assembled) {
584             if (!aij->colmap) {
585               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
586             }
587 #if defined(PETSC_USE_CTABLE)
588             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
589             col--;
590 #else
591             col = aij->colmap[in[j]] - 1;
592 #endif
593             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
594               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
595               col  =  in[j];
596               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
597               B     = aij->B;
598               b     = (Mat_SeqAIJ*)B->data;
599               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
600               rp2   = bj + bi[row];
601               ap2   = ba + bi[row];
602               rmax2 = bimax[row];
603               nrow2 = bilen[row];
604               low2  = 0;
605               high2 = nrow2;
606               bm    = aij->B->rmap->n;
607               ba    = b->a;
608             } else if (col < 0) {
609               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
610                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
611               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
612             }
613           } else col = in[j];
614           nonew = b->nonew;
615           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
616         }
617       }
618     } else {
619       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
620       if (!aij->donotstash) {
621         mat->assembled = PETSC_FALSE;
622         if (roworiented) {
623           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
624         } else {
625           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         }
627       }
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   PetscErrorCode ierr;
637   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
638   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
639 
640   PetscFunctionBegin;
641   for (i=0; i<m; i++) {
642     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
643     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
644     if (idxm[i] >= rstart && idxm[i] < rend) {
645       row = idxm[i] - rstart;
646       for (j=0; j<n; j++) {
647         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
648         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
649         if (idxn[j] >= cstart && idxn[j] < cend) {
650           col  = idxn[j] - cstart;
651           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
652         } else {
653           if (!aij->colmap) {
654             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
655           }
656 #if defined(PETSC_USE_CTABLE)
657           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
658           col--;
659 #else
660           col = aij->colmap[idxn[j]] - 1;
661 #endif
662           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
663           else {
664             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
665           }
666         }
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
674 
675 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
676 {
677   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
678   PetscErrorCode ierr;
679   PetscInt       nstash,reallocs;
680 
681   PetscFunctionBegin;
682   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
683 
684   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
685   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
686   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
691 {
692   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
693   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
694   PetscErrorCode ierr;
695   PetscMPIInt    n;
696   PetscInt       i,j,rstart,ncols,flg;
697   PetscInt       *row,*col;
698   PetscBool      other_disassembled;
699   PetscScalar    *val;
700 
701   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
702 
703   PetscFunctionBegin;
704   if (!aij->donotstash && !mat->nooffprocentries) {
705     while (1) {
706       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
707       if (!flg) break;
708 
709       for (i=0; i<n; ) {
710         /* Now identify the consecutive vals belonging to the same row */
711         for (j=i,rstart=row[j]; j<n; j++) {
712           if (row[j] != rstart) break;
713         }
714         if (j < n) ncols = j-i;
715         else       ncols = n-i;
716         /* Now assemble all these values with a single function call */
717         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
718 
719         i = j;
720       }
721     }
722     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
723   }
724   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
725   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
726 
727   /* determine if any processor has disassembled, if so we must
728      also disassemble ourselfs, in order that we may reassemble. */
729   /*
730      if nonzero structure of submatrix B cannot change then we know that
731      no processor disassembled thus we can skip this stuff
732   */
733   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
734     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
735     if (mat->was_assembled && !other_disassembled) {
736       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
737     }
738   }
739   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
740     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
741   }
742   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
743   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
744   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
745 
746   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
747 
748   aij->rowvalues = 0;
749 
750   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
751   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
752 
753   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
754   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
755     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
756     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
762 {
763   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
768   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
775   PetscInt      *lrows;
776   PetscInt       r, len;
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   /* get locally owned rows */
781   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
796     PetscBool cong;
797     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
798     if (cong) A->congruentlayouts = 1;
799     else      A->congruentlayouts = 0;
800   }
801   if ((diag != 0.0) && A->congruentlayouts) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803   } else if (diag != 0.0) {
804     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
805     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
806     for (r = 0; r < len; ++r) {
807       const PetscInt row = lrows[r] + A->rmap->rstart;
808       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
809     }
810     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
812   } else {
813     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
814   }
815   ierr = PetscFree(lrows);CHKERRQ(ierr);
816 
817   /* only change matrix nonzero state if pattern was allowed to be changed */
818   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
819     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
820     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
826 {
827   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
828   PetscErrorCode    ierr;
829   PetscMPIInt       n = A->rmap->n;
830   PetscInt          i,j,r,m,p = 0,len = 0;
831   PetscInt          *lrows,*owners = A->rmap->range;
832   PetscSFNode       *rrows;
833   PetscSF           sf;
834   const PetscScalar *xx;
835   PetscScalar       *bb,*mask;
836   Vec               xmask,lmask;
837   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
838   const PetscInt    *aj, *ii,*ridx;
839   PetscScalar       *aa;
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
849     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
850       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
851     }
852     rrows[r].rank  = p;
853     rrows[r].index = rows[r] - owners[p];
854   }
855   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
856   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
857   /* Collect flags for rows to be zeroed */
858   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
859   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
860   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
861   /* Compress and put in row numbers */
862   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
863   /* zero diagonal part of matrix */
864   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
865   /* handle off diagonal part of matrix */
866   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
867   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
868   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
869   for (i=0; i<len; i++) bb[lrows[i]] = 1;
870   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
871   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
874   if (x) {
875     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
878     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
879   }
880   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
881   /* remove zeroed rows of off diagonal matrix */
882   ii = aij->i;
883   for (i=0; i<len; i++) {
884     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
885   }
886   /* loop over all elements of off process part of matrix zeroing removed columns*/
887   if (aij->compressedrow.use) {
888     m    = aij->compressedrow.nrows;
889     ii   = aij->compressedrow.i;
890     ridx = aij->compressedrow.rindex;
891     for (i=0; i<m; i++) {
892       n  = ii[i+1] - ii[i];
893       aj = aij->j + ii[i];
894       aa = aij->a + ii[i];
895 
896       for (j=0; j<n; j++) {
897         if (PetscAbsScalar(mask[*aj])) {
898           if (b) bb[*ridx] -= *aa*xx[*aj];
899           *aa = 0.0;
900         }
901         aa++;
902         aj++;
903       }
904       ridx++;
905     }
906   } else { /* do not use compressed row format */
907     m = l->B->rmap->n;
908     for (i=0; i<m; i++) {
909       n  = ii[i+1] - ii[i];
910       aj = aij->j + ii[i];
911       aa = aij->a + ii[i];
912       for (j=0; j<n; j++) {
913         if (PetscAbsScalar(mask[*aj])) {
914           if (b) bb[i] -= *aa*xx[*aj];
915           *aa = 0.0;
916         }
917         aa++;
918         aj++;
919       }
920     }
921   }
922   if (x) {
923     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
924     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
925   }
926   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
927   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
928   ierr = PetscFree(lrows);CHKERRQ(ierr);
929 
930   /* only change matrix nonzero state if pattern was allowed to be changed */
931   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
932     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
933     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
939 {
940   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
941   PetscErrorCode ierr;
942   PetscInt       nt;
943   VecScatter     Mvctx = a->Mvctx;
944 
945   PetscFunctionBegin;
946   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
947   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
948 
949   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
951   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
957 {
958   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970   VecScatter     Mvctx = a->Mvctx;
971 
972   PetscFunctionBegin;
973   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
974   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
976   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1011 {
1012   MPI_Comm       comm;
1013   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1014   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1015   IS             Me,Notme;
1016   PetscErrorCode ierr;
1017   PetscInt       M,N,first,last,*notme,i;
1018   PetscMPIInt    size;
1019 
1020   PetscFunctionBegin;
1021   /* Easy test: symmetric diagonal block */
1022   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1023   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1024   if (!*f) PetscFunctionReturn(0);
1025   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1026   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1027   if (size == 1) PetscFunctionReturn(0);
1028 
1029   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1030   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1031   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1032   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1033   for (i=0; i<first; i++) notme[i] = i;
1034   for (i=last; i<M; i++) notme[i-last+first] = i;
1035   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1036   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1037   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1038   Aoff = Aoffs[0];
1039   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1040   Boff = Boffs[0];
1041   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1042   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1043   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1044   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1045   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1046   ierr = PetscFree(notme);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1051 {
1052   PetscErrorCode ierr;
1053 
1054   PetscFunctionBegin;
1055   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   /* do nondiagonal part */
1066   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1067   /* send it on its way */
1068   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1069   /* do local part */
1070   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1071   /* receive remote parts */
1072   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*
1077   This only works correctly for square matrices where the subblock A->A is the
1078    diagonal block
1079 */
1080 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1081 {
1082   PetscErrorCode ierr;
1083   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1084 
1085   PetscFunctionBegin;
1086   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1087   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1088   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1093 {
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1099   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1104 {
1105   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109 #if defined(PETSC_USE_LOG)
1110   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1111 #endif
1112   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1114   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1115   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1116 #if defined(PETSC_USE_CTABLE)
1117   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1118 #else
1119   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1120 #endif
1121   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1122   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1123   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1124   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1125   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1126   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1127   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1128 
1129   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1138 #if defined(PETSC_HAVE_ELEMENTAL)
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1140 #endif
1141 #if defined(PETSC_HAVE_HYPRE)
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1144 #endif
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1151 {
1152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1153   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1154   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1155   PetscErrorCode ierr;
1156   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1157   int            fd;
1158   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1159   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1160   PetscScalar    *column_values;
1161   PetscInt       message_count,flowcontrolcount;
1162   FILE           *file;
1163 
1164   PetscFunctionBegin;
1165   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1166   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1167   nz   = A->nz + B->nz;
1168   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1169   if (!rank) {
1170     header[0] = MAT_FILE_CLASSID;
1171     header[1] = mat->rmap->N;
1172     header[2] = mat->cmap->N;
1173 
1174     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     /* get largest number of rows any processor has */
1177     rlen  = mat->rmap->n;
1178     range = mat->rmap->range;
1179     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1180   } else {
1181     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1182     rlen = mat->rmap->n;
1183   }
1184 
1185   /* load up the local row counts */
1186   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1187   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1188 
1189   /* store the row lengths to the file */
1190   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1191   if (!rank) {
1192     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1193     for (i=1; i<size; i++) {
1194       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1195       rlen = range[i+1] - range[i];
1196       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1198     }
1199     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1200   } else {
1201     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1202     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1204   }
1205   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1206 
1207   /* load up the local column indices */
1208   nzmax = nz; /* th processor needs space a largest processor needs */
1209   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1211   cnt   = 0;
1212   for (i=0; i<mat->rmap->n; i++) {
1213     for (j=B->i[i]; j<B->i[i+1]; j++) {
1214       if ((col = garray[B->j[j]]) > cstart) break;
1215       column_indices[cnt++] = col;
1216     }
1217     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1218     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1219   }
1220   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1221 
1222   /* store the column indices to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1230       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1231       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the local column values */
1244   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<mat->rmap->n; i++) {
1247     for (j=B->i[i]; j<B->i[i+1]; j++) {
1248       if (garray[B->j[j]] > cstart) break;
1249       column_values[cnt++] = B->a[j];
1250     }
1251     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1252     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1253   }
1254   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1255 
1256   /* store the column values to the file */
1257   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1258   if (!rank) {
1259     MPI_Status status;
1260     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1261     for (i=1; i<size; i++) {
1262       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1263       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1264       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1265       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1267     }
1268     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1269   } else {
1270     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1271     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1273     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1274   }
1275   ierr = PetscFree(column_values);CHKERRQ(ierr);
1276 
1277   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1278   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank = aij->rank,size = aij->size;
1288   PetscBool         isdraw,iascii,isbinary;
1289   PetscViewer       sviewer;
1290   PetscViewerFormat format;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1296   if (iascii) {
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1299       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1300       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1301       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302       for (i=0; i<(PetscInt)size; i++) {
1303         nmax = PetscMax(nmax,nz[i]);
1304         nmin = PetscMin(nmin,nz[i]);
1305         navg += nz[i];
1306       }
1307       ierr = PetscFree(nz);CHKERRQ(ierr);
1308       navg = navg/size;
1309       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     }
1312     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1313     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1314       MatInfo   info;
1315       PetscBool inodes;
1316 
1317       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1318       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1319       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1320       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1321       if (!inodes) {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1324       } else {
1325         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1326                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1327       }
1328       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1330       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1332       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1334       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1335       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1338       PetscInt inodecount,inodelimit,*inodes;
1339       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1340       if (inodes) {
1341         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1342       } else {
1343         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1344       }
1345       PetscFunctionReturn(0);
1346     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1347       PetscFunctionReturn(0);
1348     }
1349   } else if (isbinary) {
1350     if (size == 1) {
1351       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1352       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1353     } else {
1354       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1355     }
1356     PetscFunctionReturn(0);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1361     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1362     if (isnull) PetscFunctionReturn(0);
1363   }
1364 
1365   {
1366     /* assemble the entire matrix onto first processor. */
1367     Mat        A;
1368     Mat_SeqAIJ *Aloc;
1369     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1370     MatScalar  *a;
1371 
1372     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1373     if (!rank) {
1374       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1375     } else {
1376       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1377     }
1378     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1379     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1380     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1381     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1382     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1383 
1384     /* copy over the A part */
1385     Aloc = (Mat_SeqAIJ*)aij->A->data;
1386     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1387     row  = mat->rmap->rstart;
1388     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1389     for (i=0; i<m; i++) {
1390       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1391       row++;
1392       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1393     }
1394     aj = Aloc->j;
1395     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1396 
1397     /* copy over the B part */
1398     Aloc = (Mat_SeqAIJ*)aij->B->data;
1399     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1400     row  = mat->rmap->rstart;
1401     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1402     ct   = cols;
1403     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1408     }
1409     ierr = PetscFree(ct);CHKERRQ(ierr);
1410     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412     /*
1413        Everyone has to call to draw the matrix since the graphics waits are
1414        synchronized across all processors that share the PetscDraw object
1415     */
1416     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1417     if (!rank) {
1418       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1419       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1420     }
1421     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1422     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1423     ierr = MatDestroy(&A);CHKERRQ(ierr);
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1429 {
1430   PetscErrorCode ierr;
1431   PetscBool      iascii,isdraw,issocket,isbinary;
1432 
1433   PetscFunctionBegin;
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1438   if (iascii || isdraw || isbinary || issocket) {
1439     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1445 {
1446   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1447   PetscErrorCode ierr;
1448   Vec            bb1 = 0;
1449   PetscBool      hasop;
1450 
1451   PetscFunctionBegin;
1452   if (flag == SOR_APPLY_UPPER) {
1453     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1454     PetscFunctionReturn(0);
1455   }
1456 
1457   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1458     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1459   }
1460 
1461   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1462     if (flag & SOR_ZERO_INITIAL_GUESS) {
1463       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1464       its--;
1465     }
1466 
1467     while (its--) {
1468       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1469       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470 
1471       /* update rhs: bb1 = bb - B*x */
1472       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1473       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1474 
1475       /* local sweep */
1476       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1477     }
1478   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1479     if (flag & SOR_ZERO_INITIAL_GUESS) {
1480       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481       its--;
1482     }
1483     while (its--) {
1484       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1485       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486 
1487       /* update rhs: bb1 = bb - B*x */
1488       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1489       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1490 
1491       /* local sweep */
1492       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1493     }
1494   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1495     if (flag & SOR_ZERO_INITIAL_GUESS) {
1496       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1497       its--;
1498     }
1499     while (its--) {
1500       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502 
1503       /* update rhs: bb1 = bb - B*x */
1504       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1505       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1506 
1507       /* local sweep */
1508       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1509     }
1510   } else if (flag & SOR_EISENSTAT) {
1511     Vec xx1;
1512 
1513     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1514     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1515 
1516     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1517     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518     if (!mat->diag) {
1519       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1520       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1521     }
1522     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1523     if (hasop) {
1524       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1525     } else {
1526       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1527     }
1528     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1529 
1530     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1531 
1532     /* local sweep */
1533     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1534     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1535     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1536   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1537 
1538   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1539 
1540   matin->factorerrortype = mat->A->factorerrortype;
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1545 {
1546   Mat            aA,aB,Aperm;
1547   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1548   PetscScalar    *aa,*ba;
1549   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1550   PetscSF        rowsf,sf;
1551   IS             parcolp = NULL;
1552   PetscBool      done;
1553   PetscErrorCode ierr;
1554 
1555   PetscFunctionBegin;
1556   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1557   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1558   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1559   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1560 
1561   /* Invert row permutation to find out where my rows should go */
1562   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1563   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1564   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1565   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1566   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1568 
1569   /* Invert column permutation to find out where my columns should go */
1570   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1571   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1572   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1573   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1574   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1575   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1576   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1577 
1578   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1579   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1580   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1581 
1582   /* Find out where my gcols should go */
1583   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1584   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1585   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1586   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1587   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1588   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1589   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1590   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1591 
1592   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1593   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1594   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1595   for (i=0; i<m; i++) {
1596     PetscInt row = rdest[i],rowner;
1597     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1598     for (j=ai[i]; j<ai[i+1]; j++) {
1599       PetscInt cowner,col = cdest[aj[j]];
1600       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1601       if (rowner == cowner) dnnz[i]++;
1602       else onnz[i]++;
1603     }
1604     for (j=bi[i]; j<bi[i+1]; j++) {
1605       PetscInt cowner,col = gcdest[bj[j]];
1606       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1607       if (rowner == cowner) dnnz[i]++;
1608       else onnz[i]++;
1609     }
1610   }
1611   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1612   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1613   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1614   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1616 
1617   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1618   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1619   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1620   for (i=0; i<m; i++) {
1621     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1622     PetscInt j0,rowlen;
1623     rowlen = ai[i+1] - ai[i];
1624     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1625       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1626       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1627     }
1628     rowlen = bi[i+1] - bi[i];
1629     for (j0=j=0; j<rowlen; j0=j) {
1630       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1631       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1632     }
1633   }
1634   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1635   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1636   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1637   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1638   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1639   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1640   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1641   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1642   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1643   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1644   *B = Aperm;
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1649 {
1650   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1651   PetscErrorCode ierr;
1652 
1653   PetscFunctionBegin;
1654   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1655   if (ghosts) *ghosts = aij->garray;
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1660 {
1661   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1662   Mat            A    = mat->A,B = mat->B;
1663   PetscErrorCode ierr;
1664   PetscReal      isend[5],irecv[5];
1665 
1666   PetscFunctionBegin;
1667   info->block_size = 1.0;
1668   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1669 
1670   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1671   isend[3] = info->memory;  isend[4] = info->mallocs;
1672 
1673   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1674 
1675   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1676   isend[3] += info->memory;  isend[4] += info->mallocs;
1677   if (flag == MAT_LOCAL) {
1678     info->nz_used      = isend[0];
1679     info->nz_allocated = isend[1];
1680     info->nz_unneeded  = isend[2];
1681     info->memory       = isend[3];
1682     info->mallocs      = isend[4];
1683   } else if (flag == MAT_GLOBAL_MAX) {
1684     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1685 
1686     info->nz_used      = irecv[0];
1687     info->nz_allocated = irecv[1];
1688     info->nz_unneeded  = irecv[2];
1689     info->memory       = irecv[3];
1690     info->mallocs      = irecv[4];
1691   } else if (flag == MAT_GLOBAL_SUM) {
1692     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1693 
1694     info->nz_used      = irecv[0];
1695     info->nz_allocated = irecv[1];
1696     info->nz_unneeded  = irecv[2];
1697     info->memory       = irecv[3];
1698     info->mallocs      = irecv[4];
1699   }
1700   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1701   info->fill_ratio_needed = 0;
1702   info->factor_mallocs    = 0;
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1707 {
1708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   switch (op) {
1713   case MAT_NEW_NONZERO_LOCATIONS:
1714   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1715   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1716   case MAT_KEEP_NONZERO_PATTERN:
1717   case MAT_NEW_NONZERO_LOCATION_ERR:
1718   case MAT_USE_INODES:
1719   case MAT_IGNORE_ZERO_ENTRIES:
1720     MatCheckPreallocated(A,1);
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_ROW_ORIENTED:
1725     MatCheckPreallocated(A,1);
1726     a->roworiented = flg;
1727 
1728     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1729     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_NEW_DIAGONALS:
1732     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1733     break;
1734   case MAT_IGNORE_OFF_PROC_ENTRIES:
1735     a->donotstash = flg;
1736     break;
1737   case MAT_SPD:
1738     A->spd_set = PETSC_TRUE;
1739     A->spd     = flg;
1740     if (flg) {
1741       A->symmetric                  = PETSC_TRUE;
1742       A->structurally_symmetric     = PETSC_TRUE;
1743       A->symmetric_set              = PETSC_TRUE;
1744       A->structurally_symmetric_set = PETSC_TRUE;
1745     }
1746     break;
1747   case MAT_SYMMETRIC:
1748     MatCheckPreallocated(A,1);
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_STRUCTURALLY_SYMMETRIC:
1752     MatCheckPreallocated(A,1);
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     break;
1755   case MAT_HERMITIAN:
1756     MatCheckPreallocated(A,1);
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_SYMMETRY_ETERNAL:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     break;
1763   case MAT_SUBMAT_SINGLEIS:
1764     A->submat_singleis = flg;
1765     break;
1766   case MAT_STRUCTURE_ONLY:
1767     /* The option is handled directly by MatSetOption() */
1768     break;
1769   default:
1770     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1771   }
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1776 {
1777   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1778   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1779   PetscErrorCode ierr;
1780   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1781   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1782   PetscInt       *cmap,*idx_p;
1783 
1784   PetscFunctionBegin;
1785   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1786   mat->getrowactive = PETSC_TRUE;
1787 
1788   if (!mat->rowvalues && (idx || v)) {
1789     /*
1790         allocate enough space to hold information from the longest row.
1791     */
1792     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1793     PetscInt   max = 1,tmp;
1794     for (i=0; i<matin->rmap->n; i++) {
1795       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1796       if (max < tmp) max = tmp;
1797     }
1798     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1799   }
1800 
1801   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1802   lrow = row - rstart;
1803 
1804   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1805   if (!v)   {pvA = 0; pvB = 0;}
1806   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1807   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1808   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1809   nztot = nzA + nzB;
1810 
1811   cmap = mat->garray;
1812   if (v  || idx) {
1813     if (nztot) {
1814       /* Sort by increasing column numbers, assuming A and B already sorted */
1815       PetscInt imark = -1;
1816       if (v) {
1817         *v = v_p = mat->rowvalues;
1818         for (i=0; i<nzB; i++) {
1819           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1820           else break;
1821         }
1822         imark = i;
1823         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1824         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1825       }
1826       if (idx) {
1827         *idx = idx_p = mat->rowindices;
1828         if (imark > -1) {
1829           for (i=0; i<imark; i++) {
1830             idx_p[i] = cmap[cworkB[i]];
1831           }
1832         } else {
1833           for (i=0; i<nzB; i++) {
1834             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1835             else break;
1836           }
1837           imark = i;
1838         }
1839         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1840         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1841       }
1842     } else {
1843       if (idx) *idx = 0;
1844       if (v)   *v   = 0;
1845     }
1846   }
1847   *nz  = nztot;
1848   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1849   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1854 {
1855   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1856 
1857   PetscFunctionBegin;
1858   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1859   aij->getrowactive = PETSC_FALSE;
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1864 {
1865   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1866   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1867   PetscErrorCode ierr;
1868   PetscInt       i,j,cstart = mat->cmap->rstart;
1869   PetscReal      sum = 0.0;
1870   MatScalar      *v;
1871 
1872   PetscFunctionBegin;
1873   if (aij->size == 1) {
1874     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1875   } else {
1876     if (type == NORM_FROBENIUS) {
1877       v = amat->a;
1878       for (i=0; i<amat->nz; i++) {
1879         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1880       }
1881       v = bmat->a;
1882       for (i=0; i<bmat->nz; i++) {
1883         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1884       }
1885       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1886       *norm = PetscSqrtReal(*norm);
1887       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1888     } else if (type == NORM_1) { /* max column norm */
1889       PetscReal *tmp,*tmp2;
1890       PetscInt  *jj,*garray = aij->garray;
1891       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1892       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1893       *norm = 0.0;
1894       v     = amat->a; jj = amat->j;
1895       for (j=0; j<amat->nz; j++) {
1896         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1897       }
1898       v = bmat->a; jj = bmat->j;
1899       for (j=0; j<bmat->nz; j++) {
1900         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1901       }
1902       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1903       for (j=0; j<mat->cmap->N; j++) {
1904         if (tmp2[j] > *norm) *norm = tmp2[j];
1905       }
1906       ierr = PetscFree(tmp);CHKERRQ(ierr);
1907       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1908       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1909     } else if (type == NORM_INFINITY) { /* max row norm */
1910       PetscReal ntemp = 0.0;
1911       for (j=0; j<aij->A->rmap->n; j++) {
1912         v   = amat->a + amat->i[j];
1913         sum = 0.0;
1914         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1915           sum += PetscAbsScalar(*v); v++;
1916         }
1917         v = bmat->a + bmat->i[j];
1918         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1919           sum += PetscAbsScalar(*v); v++;
1920         }
1921         if (sum > ntemp) ntemp = sum;
1922       }
1923       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1924       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1925     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1931 {
1932   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1933   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1934   PetscErrorCode ierr;
1935   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1936   PetscInt       cstart = A->cmap->rstart,ncol;
1937   Mat            B;
1938   MatScalar      *array;
1939 
1940   PetscFunctionBegin;
1941   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1942   ai = Aloc->i; aj = Aloc->j;
1943   bi = Bloc->i; bj = Bloc->j;
1944   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1945     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1946     PetscSFNode          *oloc;
1947     PETSC_UNUSED PetscSF sf;
1948 
1949     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1950     /* compute d_nnz for preallocation */
1951     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1952     for (i=0; i<ai[ma]; i++) {
1953       d_nnz[aj[i]]++;
1954       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1955     }
1956     /* compute local off-diagonal contributions */
1957     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1958     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1959     /* map those to global */
1960     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1961     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1962     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1963     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1964     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1965     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1966     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1967 
1968     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1969     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1970     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1971     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1972     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1973     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1974   } else {
1975     B    = *matout;
1976     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1977     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1978   }
1979 
1980   /* copy over the A part */
1981   array = Aloc->a;
1982   row   = A->rmap->rstart;
1983   for (i=0; i<ma; i++) {
1984     ncol = ai[i+1]-ai[i];
1985     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1986     row++;
1987     array += ncol; aj += ncol;
1988   }
1989   aj = Aloc->j;
1990   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1991 
1992   /* copy over the B part */
1993   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1994   array = Bloc->a;
1995   row   = A->rmap->rstart;
1996   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1997   cols_tmp = cols;
1998   for (i=0; i<mb; i++) {
1999     ncol = bi[i+1]-bi[i];
2000     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2001     row++;
2002     array += ncol; cols_tmp += ncol;
2003   }
2004   ierr = PetscFree(cols);CHKERRQ(ierr);
2005 
2006   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2007   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2008   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2009     *matout = B;
2010   } else {
2011     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2017 {
2018   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2019   Mat            a    = aij->A,b = aij->B;
2020   PetscErrorCode ierr;
2021   PetscInt       s1,s2,s3;
2022 
2023   PetscFunctionBegin;
2024   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2025   if (rr) {
2026     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2027     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2028     /* Overlap communication with computation. */
2029     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2030   }
2031   if (ll) {
2032     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2033     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2034     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2035   }
2036   /* scale  the diagonal block */
2037   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2038 
2039   if (rr) {
2040     /* Do a scatter end and then right scale the off-diagonal block */
2041     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2042     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2048 {
2049   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2050   PetscErrorCode ierr;
2051 
2052   PetscFunctionBegin;
2053   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2058 {
2059   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2060   Mat            a,b,c,d;
2061   PetscBool      flg;
2062   PetscErrorCode ierr;
2063 
2064   PetscFunctionBegin;
2065   a = matA->A; b = matA->B;
2066   c = matB->A; d = matB->B;
2067 
2068   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2069   if (flg) {
2070     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2071   }
2072   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2077 {
2078   PetscErrorCode ierr;
2079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2080   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2081 
2082   PetscFunctionBegin;
2083   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2084   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2085     /* because of the column compression in the off-processor part of the matrix a->B,
2086        the number of columns in a->B and b->B may be different, hence we cannot call
2087        the MatCopy() directly on the two parts. If need be, we can provide a more
2088        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2089        then copying the submatrices */
2090     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2091   } else {
2092     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2093     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2094   }
2095   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2100 {
2101   PetscErrorCode ierr;
2102 
2103   PetscFunctionBegin;
2104   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 /*
2109    Computes the number of nonzeros per row needed for preallocation when X and Y
2110    have different nonzero structure.
2111 */
2112 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2113 {
2114   PetscInt       i,j,k,nzx,nzy;
2115 
2116   PetscFunctionBegin;
2117   /* Set the number of nonzeros in the new matrix */
2118   for (i=0; i<m; i++) {
2119     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2120     nzx = xi[i+1] - xi[i];
2121     nzy = yi[i+1] - yi[i];
2122     nnz[i] = 0;
2123     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2124       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2125       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2126       nnz[i]++;
2127     }
2128     for (; k<nzy; k++) nnz[i]++;
2129   }
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2134 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2135 {
2136   PetscErrorCode ierr;
2137   PetscInt       m = Y->rmap->N;
2138   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2139   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2140 
2141   PetscFunctionBegin;
2142   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2147 {
2148   PetscErrorCode ierr;
2149   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2150   PetscBLASInt   bnz,one=1;
2151   Mat_SeqAIJ     *x,*y;
2152 
2153   PetscFunctionBegin;
2154   if (str == SAME_NONZERO_PATTERN) {
2155     PetscScalar alpha = a;
2156     x    = (Mat_SeqAIJ*)xx->A->data;
2157     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2158     y    = (Mat_SeqAIJ*)yy->A->data;
2159     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2160     x    = (Mat_SeqAIJ*)xx->B->data;
2161     y    = (Mat_SeqAIJ*)yy->B->data;
2162     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2163     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2164     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2165   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2166     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2167   } else {
2168     Mat      B;
2169     PetscInt *nnz_d,*nnz_o;
2170     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2171     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2172     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2173     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2174     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2175     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2176     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2177     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2178     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2179     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2180     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2181     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2182     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2183     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2184   }
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2189 
2190 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2191 {
2192 #if defined(PETSC_USE_COMPLEX)
2193   PetscErrorCode ierr;
2194   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2195 
2196   PetscFunctionBegin;
2197   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2198   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2199 #else
2200   PetscFunctionBegin;
2201 #endif
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2206 {
2207   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2212   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2217 {
2218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2223   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2228 {
2229   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2230   PetscErrorCode ierr;
2231   PetscInt       i,*idxb = 0;
2232   PetscScalar    *va,*vb;
2233   Vec            vtmp;
2234 
2235   PetscFunctionBegin;
2236   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2237   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2238   if (idx) {
2239     for (i=0; i<A->rmap->n; i++) {
2240       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2241     }
2242   }
2243 
2244   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2245   if (idx) {
2246     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2247   }
2248   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2249   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2250 
2251   for (i=0; i<A->rmap->n; i++) {
2252     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2253       va[i] = vb[i];
2254       if (idx) idx[i] = a->garray[idxb[i]];
2255     }
2256   }
2257 
2258   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2259   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2260   ierr = PetscFree(idxb);CHKERRQ(ierr);
2261   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269   PetscInt       i,*idxb = 0;
2270   PetscScalar    *va,*vb;
2271   Vec            vtmp;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2275   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2276   if (idx) {
2277     for (i=0; i<A->cmap->n; i++) {
2278       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2279     }
2280   }
2281 
2282   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2283   if (idx) {
2284     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2285   }
2286   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2287   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2288 
2289   for (i=0; i<A->rmap->n; i++) {
2290     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2291       va[i] = vb[i];
2292       if (idx) idx[i] = a->garray[idxb[i]];
2293     }
2294   }
2295 
2296   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2297   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2298   ierr = PetscFree(idxb);CHKERRQ(ierr);
2299   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2304 {
2305   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2306   PetscInt       n      = A->rmap->n;
2307   PetscInt       cstart = A->cmap->rstart;
2308   PetscInt       *cmap  = mat->garray;
2309   PetscInt       *diagIdx, *offdiagIdx;
2310   Vec            diagV, offdiagV;
2311   PetscScalar    *a, *diagA, *offdiagA;
2312   PetscInt       r;
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2317   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2318   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2319   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2320   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2321   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2322   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2323   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2324   for (r = 0; r < n; ++r) {
2325     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2326       a[r]   = diagA[r];
2327       idx[r] = cstart + diagIdx[r];
2328     } else {
2329       a[r]   = offdiagA[r];
2330       idx[r] = cmap[offdiagIdx[r]];
2331     }
2332   }
2333   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2334   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2335   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2336   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2337   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2338   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2343 {
2344   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2345   PetscInt       n      = A->rmap->n;
2346   PetscInt       cstart = A->cmap->rstart;
2347   PetscInt       *cmap  = mat->garray;
2348   PetscInt       *diagIdx, *offdiagIdx;
2349   Vec            diagV, offdiagV;
2350   PetscScalar    *a, *diagA, *offdiagA;
2351   PetscInt       r;
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2356   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2357   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2358   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2359   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2360   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2361   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2362   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2363   for (r = 0; r < n; ++r) {
2364     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2365       a[r]   = diagA[r];
2366       idx[r] = cstart + diagIdx[r];
2367     } else {
2368       a[r]   = offdiagA[r];
2369       idx[r] = cmap[offdiagIdx[r]];
2370     }
2371   }
2372   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2373   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2375   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2376   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2377   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2382 {
2383   PetscErrorCode ierr;
2384   Mat            *dummy;
2385 
2386   PetscFunctionBegin;
2387   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2388   *newmat = *dummy;
2389   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2400   A->factorerrortype = a->A->factorerrortype;
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2405 {
2406   PetscErrorCode ierr;
2407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2408 
2409   PetscFunctionBegin;
2410   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2411   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2412   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2413   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2418 {
2419   PetscFunctionBegin;
2420   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2421   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 /*@
2426    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2427 
2428    Collective on Mat
2429 
2430    Input Parameters:
2431 +    A - the matrix
2432 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2433 
2434  Level: advanced
2435 
2436 @*/
2437 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2438 {
2439   PetscErrorCode       ierr;
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2447 {
2448   PetscErrorCode       ierr;
2449   PetscBool            sc = PETSC_FALSE,flg;
2450 
2451   PetscFunctionBegin;
2452   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2453   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2454   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2455   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2456   if (flg) {
2457     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2458   }
2459   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2464 {
2465   PetscErrorCode ierr;
2466   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2467   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2468 
2469   PetscFunctionBegin;
2470   if (!Y->preallocated) {
2471     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2472   } else if (!aij->nz) {
2473     PetscInt nonew = aij->nonew;
2474     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2475     aij->nonew = nonew;
2476   }
2477   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2482 {
2483   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2484   PetscErrorCode ierr;
2485 
2486   PetscFunctionBegin;
2487   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2488   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2489   if (d) {
2490     PetscInt rstart;
2491     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2492     *d += rstart;
2493 
2494   }
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 
2499 /* -------------------------------------------------------------------*/
2500 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2501                                        MatGetRow_MPIAIJ,
2502                                        MatRestoreRow_MPIAIJ,
2503                                        MatMult_MPIAIJ,
2504                                 /* 4*/ MatMultAdd_MPIAIJ,
2505                                        MatMultTranspose_MPIAIJ,
2506                                        MatMultTransposeAdd_MPIAIJ,
2507                                        0,
2508                                        0,
2509                                        0,
2510                                 /*10*/ 0,
2511                                        0,
2512                                        0,
2513                                        MatSOR_MPIAIJ,
2514                                        MatTranspose_MPIAIJ,
2515                                 /*15*/ MatGetInfo_MPIAIJ,
2516                                        MatEqual_MPIAIJ,
2517                                        MatGetDiagonal_MPIAIJ,
2518                                        MatDiagonalScale_MPIAIJ,
2519                                        MatNorm_MPIAIJ,
2520                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2521                                        MatAssemblyEnd_MPIAIJ,
2522                                        MatSetOption_MPIAIJ,
2523                                        MatZeroEntries_MPIAIJ,
2524                                 /*24*/ MatZeroRows_MPIAIJ,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                 /*29*/ MatSetUp_MPIAIJ,
2530                                        0,
2531                                        0,
2532                                        MatGetDiagonalBlock_MPIAIJ,
2533                                        0,
2534                                 /*34*/ MatDuplicate_MPIAIJ,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                 /*39*/ MatAXPY_MPIAIJ,
2540                                        MatCreateSubMatrices_MPIAIJ,
2541                                        MatIncreaseOverlap_MPIAIJ,
2542                                        MatGetValues_MPIAIJ,
2543                                        MatCopy_MPIAIJ,
2544                                 /*44*/ MatGetRowMax_MPIAIJ,
2545                                        MatScale_MPIAIJ,
2546                                        MatShift_MPIAIJ,
2547                                        MatDiagonalSet_MPIAIJ,
2548                                        MatZeroRowsColumns_MPIAIJ,
2549                                 /*49*/ MatSetRandom_MPIAIJ,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2555                                        0,
2556                                        MatSetUnfactored_MPIAIJ,
2557                                        MatPermute_MPIAIJ,
2558                                        0,
2559                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2560                                        MatDestroy_MPIAIJ,
2561                                        MatView_MPIAIJ,
2562                                        0,
2563                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2564                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2565                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2570                                        MatGetRowMinAbs_MPIAIJ,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*75*/ MatFDColoringApply_AIJ,
2576                                        MatSetFromOptions_MPIAIJ,
2577                                        0,
2578                                        0,
2579                                        MatFindZeroDiagonals_MPIAIJ,
2580                                 /*80*/ 0,
2581                                        0,
2582                                        0,
2583                                 /*83*/ MatLoad_MPIAIJ,
2584                                        MatIsSymmetric_MPIAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2590                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2591                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2592                                        MatPtAP_MPIAIJ_MPIAIJ,
2593                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2594                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*99*/ 0,
2600                                        0,
2601                                        0,
2602                                        MatConjugate_MPIAIJ,
2603                                        0,
2604                                 /*104*/MatSetValuesRow_MPIAIJ,
2605                                        MatRealPart_MPIAIJ,
2606                                        MatImaginaryPart_MPIAIJ,
2607                                        0,
2608                                        0,
2609                                 /*109*/0,
2610                                        0,
2611                                        MatGetRowMin_MPIAIJ,
2612                                        0,
2613                                        MatMissingDiagonal_MPIAIJ,
2614                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2615                                        0,
2616                                        MatGetGhosts_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                 /*119*/0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        MatGetMultiProcBlock_MPIAIJ,
2624                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2625                                        MatGetColumnNorms_MPIAIJ,
2626                                        MatInvertBlockDiagonal_MPIAIJ,
2627                                        0,
2628                                        MatCreateSubMatricesMPI_MPIAIJ,
2629                                 /*129*/0,
2630                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2631                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2632                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2633                                        0,
2634                                 /*134*/0,
2635                                        0,
2636                                        MatRARt_MPIAIJ_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                 /*139*/MatSetBlockSizes_MPIAIJ,
2640                                        0,
2641                                        0,
2642                                        MatFDColoringSetUp_MPIXAIJ,
2643                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2644                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2645 };
2646 
2647 /* ----------------------------------------------------------------------------------------*/
2648 
2649 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2650 {
2651   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2652   PetscErrorCode ierr;
2653 
2654   PetscFunctionBegin;
2655   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2656   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2661 {
2662   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2663   PetscErrorCode ierr;
2664 
2665   PetscFunctionBegin;
2666   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2667   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2672 {
2673   Mat_MPIAIJ     *b;
2674   PetscErrorCode ierr;
2675 
2676   PetscFunctionBegin;
2677   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2678   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2679   b = (Mat_MPIAIJ*)B->data;
2680 
2681 #if defined(PETSC_USE_CTABLE)
2682   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2683 #else
2684   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2685 #endif
2686   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2687   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2688   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2689 
2690   /* Because the B will have been resized we simply destroy it and create a new one each time */
2691   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2692   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2693   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2694   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2695   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2696   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2697 
2698   if (!B->preallocated) {
2699     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2700     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2701     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2702     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2703     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2704   }
2705 
2706   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2707   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2708   B->preallocated  = PETSC_TRUE;
2709   B->was_assembled = PETSC_FALSE;
2710   B->assembled     = PETSC_FALSE;;
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2715 {
2716   Mat_MPIAIJ     *b;
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2721   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2722   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2723   b = (Mat_MPIAIJ*)B->data;
2724 
2725 #if defined(PETSC_USE_CTABLE)
2726   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2727 #else
2728   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2729 #endif
2730   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2731   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2732   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2733 
2734   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2735   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2736   B->preallocated  = PETSC_TRUE;
2737   B->was_assembled = PETSC_FALSE;
2738   B->assembled = PETSC_FALSE;
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2743 {
2744   Mat            mat;
2745   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2746   PetscErrorCode ierr;
2747 
2748   PetscFunctionBegin;
2749   *newmat = 0;
2750   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2751   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2752   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2753   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2754   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2755   a       = (Mat_MPIAIJ*)mat->data;
2756 
2757   mat->factortype   = matin->factortype;
2758   mat->assembled    = PETSC_TRUE;
2759   mat->insertmode   = NOT_SET_VALUES;
2760   mat->preallocated = PETSC_TRUE;
2761 
2762   a->size         = oldmat->size;
2763   a->rank         = oldmat->rank;
2764   a->donotstash   = oldmat->donotstash;
2765   a->roworiented  = oldmat->roworiented;
2766   a->rowindices   = 0;
2767   a->rowvalues    = 0;
2768   a->getrowactive = PETSC_FALSE;
2769 
2770   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2771   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2772 
2773   if (oldmat->colmap) {
2774 #if defined(PETSC_USE_CTABLE)
2775     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2776 #else
2777     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2778     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2779     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2780 #endif
2781   } else a->colmap = 0;
2782   if (oldmat->garray) {
2783     PetscInt len;
2784     len  = oldmat->B->cmap->n;
2785     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2786     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2787     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2788   } else a->garray = 0;
2789 
2790   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2791   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2792   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2793   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2794 
2795   if (oldmat->Mvctx_mpi1) {
2796     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2797     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2798   }
2799 
2800   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2801   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2802   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2803   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2804   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2805   *newmat = mat;
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2810 {
2811   PetscScalar    *vals,*svals;
2812   MPI_Comm       comm;
2813   PetscErrorCode ierr;
2814   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2815   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2816   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2817   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2818   PetscInt       cend,cstart,n,*rowners;
2819   int            fd;
2820   PetscInt       bs = newMat->rmap->bs;
2821 
2822   PetscFunctionBegin;
2823   /* force binary viewer to load .info file if it has not yet done so */
2824   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2825   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2826   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2827   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2828   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2829   if (!rank) {
2830     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2831     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2832     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2833   }
2834 
2835   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2836   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2837   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2838   if (bs < 0) bs = 1;
2839 
2840   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2841   M    = header[1]; N = header[2];
2842 
2843   /* If global sizes are set, check if they are consistent with that given in the file */
2844   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2845   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2846 
2847   /* determine ownership of all (block) rows */
2848   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2849   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2850   else m = newMat->rmap->n; /* Set by user */
2851 
2852   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2853   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2854 
2855   /* First process needs enough room for process with most rows */
2856   if (!rank) {
2857     mmax = rowners[1];
2858     for (i=2; i<=size; i++) {
2859       mmax = PetscMax(mmax, rowners[i]);
2860     }
2861   } else mmax = -1;             /* unused, but compilers complain */
2862 
2863   rowners[0] = 0;
2864   for (i=2; i<=size; i++) {
2865     rowners[i] += rowners[i-1];
2866   }
2867   rstart = rowners[rank];
2868   rend   = rowners[rank+1];
2869 
2870   /* distribute row lengths to all processors */
2871   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2872   if (!rank) {
2873     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2874     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2875     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2876     for (j=0; j<m; j++) {
2877       procsnz[0] += ourlens[j];
2878     }
2879     for (i=1; i<size; i++) {
2880       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2881       /* calculate the number of nonzeros on each processor */
2882       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2883         procsnz[i] += rowlengths[j];
2884       }
2885       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2886     }
2887     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2888   } else {
2889     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2890   }
2891 
2892   if (!rank) {
2893     /* determine max buffer needed and allocate it */
2894     maxnz = 0;
2895     for (i=0; i<size; i++) {
2896       maxnz = PetscMax(maxnz,procsnz[i]);
2897     }
2898     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2899 
2900     /* read in my part of the matrix column indices  */
2901     nz   = procsnz[0];
2902     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2903     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2904 
2905     /* read in every one elses and ship off */
2906     for (i=1; i<size; i++) {
2907       nz   = procsnz[i];
2908       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2909       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2910     }
2911     ierr = PetscFree(cols);CHKERRQ(ierr);
2912   } else {
2913     /* determine buffer space needed for message */
2914     nz = 0;
2915     for (i=0; i<m; i++) {
2916       nz += ourlens[i];
2917     }
2918     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2919 
2920     /* receive message of column indices*/
2921     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2922   }
2923 
2924   /* determine column ownership if matrix is not square */
2925   if (N != M) {
2926     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2927     else n = newMat->cmap->n;
2928     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2929     cstart = cend - n;
2930   } else {
2931     cstart = rstart;
2932     cend   = rend;
2933     n      = cend - cstart;
2934   }
2935 
2936   /* loop over local rows, determining number of off diagonal entries */
2937   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2938   jj   = 0;
2939   for (i=0; i<m; i++) {
2940     for (j=0; j<ourlens[i]; j++) {
2941       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2942       jj++;
2943     }
2944   }
2945 
2946   for (i=0; i<m; i++) {
2947     ourlens[i] -= offlens[i];
2948   }
2949   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2950 
2951   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2952 
2953   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2954 
2955   for (i=0; i<m; i++) {
2956     ourlens[i] += offlens[i];
2957   }
2958 
2959   if (!rank) {
2960     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2961 
2962     /* read in my part of the matrix numerical values  */
2963     nz   = procsnz[0];
2964     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2965 
2966     /* insert into matrix */
2967     jj      = rstart;
2968     smycols = mycols;
2969     svals   = vals;
2970     for (i=0; i<m; i++) {
2971       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2972       smycols += ourlens[i];
2973       svals   += ourlens[i];
2974       jj++;
2975     }
2976 
2977     /* read in other processors and ship out */
2978     for (i=1; i<size; i++) {
2979       nz   = procsnz[i];
2980       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2981       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2982     }
2983     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2984   } else {
2985     /* receive numeric values */
2986     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2987 
2988     /* receive message of values*/
2989     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2990 
2991     /* insert into matrix */
2992     jj      = rstart;
2993     smycols = mycols;
2994     svals   = vals;
2995     for (i=0; i<m; i++) {
2996       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2997       smycols += ourlens[i];
2998       svals   += ourlens[i];
2999       jj++;
3000     }
3001   }
3002   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3003   ierr = PetscFree(vals);CHKERRQ(ierr);
3004   ierr = PetscFree(mycols);CHKERRQ(ierr);
3005   ierr = PetscFree(rowners);CHKERRQ(ierr);
3006   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3007   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 /* Not scalable because of ISAllGather() unless getting all columns. */
3012 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3013 {
3014   PetscErrorCode ierr;
3015   IS             iscol_local;
3016   PetscBool      isstride;
3017   PetscMPIInt    lisstride=0,gisstride;
3018 
3019   PetscFunctionBegin;
3020   /* check if we are grabbing all columns*/
3021   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3022 
3023   if (isstride) {
3024     PetscInt  start,len,mstart,mlen;
3025     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3026     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3027     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3028     if (mstart == start && mlen-mstart == len) lisstride = 1;
3029   }
3030 
3031   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3032   if (gisstride) {
3033     PetscInt N;
3034     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3035     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3036     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3037     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3038   } else {
3039     PetscInt cbs;
3040     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3041     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3042     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3043   }
3044 
3045   *isseq = iscol_local;
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 /*
3050  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3051  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3052 
3053  Input Parameters:
3054    mat - matrix
3055    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3056            i.e., mat->rstart <= isrow[i] < mat->rend
3057    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3058            i.e., mat->cstart <= iscol[i] < mat->cend
3059  Output Parameter:
3060    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3061    iscol_o - sequential column index set for retrieving mat->B
3062    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3063  */
3064 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3065 {
3066   PetscErrorCode ierr;
3067   Vec            x,cmap;
3068   const PetscInt *is_idx;
3069   PetscScalar    *xarray,*cmaparray;
3070   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3071   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3072   Mat            B=a->B;
3073   Vec            lvec=a->lvec,lcmap;
3074   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3075   MPI_Comm       comm;
3076   VecScatter     Mvctx=a->Mvctx;
3077 
3078   PetscFunctionBegin;
3079   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3080   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3081 
3082   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3083   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3084   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3085   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3086   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3087 
3088   /* Get start indices */
3089   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3090   isstart -= ncols;
3091   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3092 
3093   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3094   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3095   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3096   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3097   for (i=0; i<ncols; i++) {
3098     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3099     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3100     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3101   }
3102   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3103   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3104   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3105 
3106   /* Get iscol_d */
3107   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3108   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3109   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3110 
3111   /* Get isrow_d */
3112   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3113   rstart = mat->rmap->rstart;
3114   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3115   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3116   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3117   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3118 
3119   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3120   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3121   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3122 
3123   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3124   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3126 
3127   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3128 
3129   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3130   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3131 
3132   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3133   /* off-process column indices */
3134   count = 0;
3135   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3136   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3137 
3138   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3139   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3140   for (i=0; i<Bn; i++) {
3141     if (PetscRealPart(xarray[i]) > -1.0) {
3142       idx[count]     = i;                   /* local column index in off-diagonal part B */
3143       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3144       count++;
3145     }
3146   }
3147   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3148   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3149 
3150   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3151   /* cannot ensure iscol_o has same blocksize as iscol! */
3152 
3153   ierr = PetscFree(idx);CHKERRQ(ierr);
3154   *garray = cmap1;
3155 
3156   ierr = VecDestroy(&x);CHKERRQ(ierr);
3157   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3158   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3163 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3164 {
3165   PetscErrorCode ierr;
3166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3167   Mat            M = NULL;
3168   MPI_Comm       comm;
3169   IS             iscol_d,isrow_d,iscol_o;
3170   Mat            Asub = NULL,Bsub = NULL;
3171   PetscInt       n;
3172 
3173   PetscFunctionBegin;
3174   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3175 
3176   if (call == MAT_REUSE_MATRIX) {
3177     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3178     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3179     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3180 
3181     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3182     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3183 
3184     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3185     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3186 
3187     /* Update diagonal and off-diagonal portions of submat */
3188     asub = (Mat_MPIAIJ*)(*submat)->data;
3189     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3190     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3191     if (n) {
3192       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3193     }
3194     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3195     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3196 
3197   } else { /* call == MAT_INITIAL_MATRIX) */
3198     const PetscInt *garray;
3199     PetscInt        BsubN;
3200 
3201     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3202     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3203 
3204     /* Create local submatrices Asub and Bsub */
3205     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3206     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3207 
3208     /* Create submatrix M */
3209     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3210 
3211     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3212     asub = (Mat_MPIAIJ*)M->data;
3213 
3214     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3215     n = asub->B->cmap->N;
3216     if (BsubN > n) {
3217       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3218       const PetscInt *idx;
3219       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3220       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3221 
3222       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3223       j = 0;
3224       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3225       for (i=0; i<n; i++) {
3226         if (j >= BsubN) break;
3227         while (subgarray[i] > garray[j]) j++;
3228 
3229         if (subgarray[i] == garray[j]) {
3230           idx_new[i] = idx[j++];
3231         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3232       }
3233       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3234 
3235       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3236       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3237 
3238     } else if (BsubN < n) {
3239       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3240     }
3241 
3242     ierr = PetscFree(garray);CHKERRQ(ierr);
3243     *submat = M;
3244 
3245     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3246     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3247     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3248 
3249     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3250     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3251 
3252     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3253     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3254   }
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3259 {
3260   PetscErrorCode ierr;
3261   IS             iscol_local=NULL,isrow_d;
3262   PetscInt       csize;
3263   PetscInt       n,i,j,start,end;
3264   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3265   MPI_Comm       comm;
3266 
3267   PetscFunctionBegin;
3268   /* If isrow has same processor distribution as mat,
3269      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3270   if (call == MAT_REUSE_MATRIX) {
3271     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3272     if (isrow_d) {
3273       sameRowDist  = PETSC_TRUE;
3274       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3275     } else {
3276       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3277       if (iscol_local) {
3278         sameRowDist  = PETSC_TRUE;
3279         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3280       }
3281     }
3282   } else {
3283     /* Check if isrow has same processor distribution as mat */
3284     sameDist[0] = PETSC_FALSE;
3285     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3286     if (!n) {
3287       sameDist[0] = PETSC_TRUE;
3288     } else {
3289       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3290       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3291       if (i >= start && j < end) {
3292         sameDist[0] = PETSC_TRUE;
3293       }
3294     }
3295 
3296     /* Check if iscol has same processor distribution as mat */
3297     sameDist[1] = PETSC_FALSE;
3298     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3299     if (!n) {
3300       sameDist[1] = PETSC_TRUE;
3301     } else {
3302       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3303       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3304       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3305     }
3306 
3307     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3308     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3309     sameRowDist = tsameDist[0];
3310   }
3311 
3312   if (sameRowDist) {
3313     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3314       /* isrow and iscol have same processor distribution as mat */
3315       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3316       PetscFunctionReturn(0);
3317     } else { /* sameRowDist */
3318       /* isrow has same processor distribution as mat */
3319       if (call == MAT_INITIAL_MATRIX) {
3320         PetscBool sorted;
3321         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3322         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3323         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3324         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3325 
3326         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3327         if (sorted) {
3328           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3329           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3330           PetscFunctionReturn(0);
3331         }
3332       } else { /* call == MAT_REUSE_MATRIX */
3333         IS    iscol_sub;
3334         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3335         if (iscol_sub) {
3336           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3337           PetscFunctionReturn(0);
3338         }
3339       }
3340     }
3341   }
3342 
3343   /* General case: iscol -> iscol_local which has global size of iscol */
3344   if (call == MAT_REUSE_MATRIX) {
3345     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3346     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3347   } else {
3348     if (!iscol_local) {
3349       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3350     }
3351   }
3352 
3353   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3354   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3355 
3356   if (call == MAT_INITIAL_MATRIX) {
3357     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3358     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3359   }
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 /*@C
3364      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3365          and "off-diagonal" part of the matrix in CSR format.
3366 
3367    Collective on MPI_Comm
3368 
3369    Input Parameters:
3370 +  comm - MPI communicator
3371 .  A - "diagonal" portion of matrix
3372 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3373 -  garray - global index of B columns
3374 
3375    Output Parameter:
3376 .   mat - the matrix, with input A as its local diagonal matrix
3377    Level: advanced
3378 
3379    Notes:
3380        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3381        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3382 
3383 .seealso: MatCreateMPIAIJWithSplitArrays()
3384 @*/
3385 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3386 {
3387   PetscErrorCode ierr;
3388   Mat_MPIAIJ     *maij;
3389   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3390   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3391   PetscScalar    *oa=b->a;
3392   Mat            Bnew;
3393   PetscInt       m,n,N;
3394 
3395   PetscFunctionBegin;
3396   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3397   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3398   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3399   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3400   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3401   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3402 
3403   /* Get global columns of mat */
3404   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3405 
3406   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3407   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3408   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3409   maij = (Mat_MPIAIJ*)(*mat)->data;
3410 
3411   (*mat)->preallocated = PETSC_TRUE;
3412 
3413   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3414   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3415 
3416   /* Set A as diagonal portion of *mat */
3417   maij->A = A;
3418 
3419   nz = oi[m];
3420   for (i=0; i<nz; i++) {
3421     col   = oj[i];
3422     oj[i] = garray[col];
3423   }
3424 
3425    /* Set Bnew as off-diagonal portion of *mat */
3426   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3427   bnew        = (Mat_SeqAIJ*)Bnew->data;
3428   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3429   maij->B     = Bnew;
3430 
3431   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3432 
3433   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3434   b->free_a       = PETSC_FALSE;
3435   b->free_ij      = PETSC_FALSE;
3436   ierr = MatDestroy(&B);CHKERRQ(ierr);
3437 
3438   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3439   bnew->free_a       = PETSC_TRUE;
3440   bnew->free_ij      = PETSC_TRUE;
3441 
3442   /* condense columns of maij->B */
3443   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3444   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3445   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3446   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3447   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3452 
3453 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3454 {
3455   PetscErrorCode ierr;
3456   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3457   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3458   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3459   Mat            M,Msub,B=a->B;
3460   MatScalar      *aa;
3461   Mat_SeqAIJ     *aij;
3462   PetscInt       *garray = a->garray,*colsub,Ncols;
3463   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3464   IS             iscol_sub,iscmap;
3465   const PetscInt *is_idx,*cmap;
3466   PetscBool      allcolumns=PETSC_FALSE;
3467   MPI_Comm       comm;
3468 
3469   PetscFunctionBegin;
3470   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3471 
3472   if (call == MAT_REUSE_MATRIX) {
3473     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3474     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3475     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3476 
3477     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3478     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3479 
3480     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3481     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3482 
3483     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3484 
3485   } else { /* call == MAT_INITIAL_MATRIX) */
3486     PetscBool flg;
3487 
3488     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3489     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3490 
3491     /* (1) iscol -> nonscalable iscol_local */
3492     /* Check for special case: each processor gets entire matrix columns */
3493     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3494     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3495     if (allcolumns) {
3496       iscol_sub = iscol_local;
3497       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3498       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3499 
3500     } else {
3501       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3502       PetscInt *idx,*cmap1,k;
3503       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3504       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3505       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3506       count = 0;
3507       k     = 0;
3508       for (i=0; i<Ncols; i++) {
3509         j = is_idx[i];
3510         if (j >= cstart && j < cend) {
3511           /* diagonal part of mat */
3512           idx[count]     = j;
3513           cmap1[count++] = i; /* column index in submat */
3514         } else if (Bn) {
3515           /* off-diagonal part of mat */
3516           if (j == garray[k]) {
3517             idx[count]     = j;
3518             cmap1[count++] = i;  /* column index in submat */
3519           } else if (j > garray[k]) {
3520             while (j > garray[k] && k < Bn-1) k++;
3521             if (j == garray[k]) {
3522               idx[count]     = j;
3523               cmap1[count++] = i; /* column index in submat */
3524             }
3525           }
3526         }
3527       }
3528       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3529 
3530       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3531       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3532       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3533 
3534       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3535     }
3536 
3537     /* (3) Create sequential Msub */
3538     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3539   }
3540 
3541   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3542   aij  = (Mat_SeqAIJ*)(Msub)->data;
3543   ii   = aij->i;
3544   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3545 
3546   /*
3547       m - number of local rows
3548       Ncols - number of columns (same on all processors)
3549       rstart - first row in new global matrix generated
3550   */
3551   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3552 
3553   if (call == MAT_INITIAL_MATRIX) {
3554     /* (4) Create parallel newmat */
3555     PetscMPIInt    rank,size;
3556     PetscInt       csize;
3557 
3558     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3559     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3560 
3561     /*
3562         Determine the number of non-zeros in the diagonal and off-diagonal
3563         portions of the matrix in order to do correct preallocation
3564     */
3565 
3566     /* first get start and end of "diagonal" columns */
3567     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3568     if (csize == PETSC_DECIDE) {
3569       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3570       if (mglobal == Ncols) { /* square matrix */
3571         nlocal = m;
3572       } else {
3573         nlocal = Ncols/size + ((Ncols % size) > rank);
3574       }
3575     } else {
3576       nlocal = csize;
3577     }
3578     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3579     rstart = rend - nlocal;
3580     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3581 
3582     /* next, compute all the lengths */
3583     jj    = aij->j;
3584     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3585     olens = dlens + m;
3586     for (i=0; i<m; i++) {
3587       jend = ii[i+1] - ii[i];
3588       olen = 0;
3589       dlen = 0;
3590       for (j=0; j<jend; j++) {
3591         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3592         else dlen++;
3593         jj++;
3594       }
3595       olens[i] = olen;
3596       dlens[i] = dlen;
3597     }
3598 
3599     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3600     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3601 
3602     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3603     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3604     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3605     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3606     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3607     ierr = PetscFree(dlens);CHKERRQ(ierr);
3608 
3609   } else { /* call == MAT_REUSE_MATRIX */
3610     M    = *newmat;
3611     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3612     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3613     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3614     /*
3615          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3616        rather than the slower MatSetValues().
3617     */
3618     M->was_assembled = PETSC_TRUE;
3619     M->assembled     = PETSC_FALSE;
3620   }
3621 
3622   /* (5) Set values of Msub to *newmat */
3623   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3624   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3625 
3626   jj   = aij->j;
3627   aa   = aij->a;
3628   for (i=0; i<m; i++) {
3629     row = rstart + i;
3630     nz  = ii[i+1] - ii[i];
3631     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3632     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3633     jj += nz; aa += nz;
3634   }
3635   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3636 
3637   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3639 
3640   ierr = PetscFree(colsub);CHKERRQ(ierr);
3641 
3642   /* save Msub, iscol_sub and iscmap used in processor for next request */
3643   if (call ==  MAT_INITIAL_MATRIX) {
3644     *newmat = M;
3645     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3646     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3647 
3648     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3649     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3650 
3651     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3652     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3653 
3654     if (iscol_local) {
3655       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3656       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3657     }
3658   }
3659   PetscFunctionReturn(0);
3660 }
3661 
3662 /*
3663     Not great since it makes two copies of the submatrix, first an SeqAIJ
3664   in local and then by concatenating the local matrices the end result.
3665   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3666 
3667   Note: This requires a sequential iscol with all indices.
3668 */
3669 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3670 {
3671   PetscErrorCode ierr;
3672   PetscMPIInt    rank,size;
3673   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3674   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3675   Mat            M,Mreuse;
3676   MatScalar      *aa,*vwork;
3677   MPI_Comm       comm;
3678   Mat_SeqAIJ     *aij;
3679   PetscBool      colflag,allcolumns=PETSC_FALSE;
3680 
3681   PetscFunctionBegin;
3682   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3683   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3684   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3685 
3686   /* Check for special case: each processor gets entire matrix columns */
3687   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3688   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3689   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3690 
3691   if (call ==  MAT_REUSE_MATRIX) {
3692     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3693     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3694     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3695   } else {
3696     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3697   }
3698 
3699   /*
3700       m - number of local rows
3701       n - number of columns (same on all processors)
3702       rstart - first row in new global matrix generated
3703   */
3704   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3705   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3706   if (call == MAT_INITIAL_MATRIX) {
3707     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3708     ii  = aij->i;
3709     jj  = aij->j;
3710 
3711     /*
3712         Determine the number of non-zeros in the diagonal and off-diagonal
3713         portions of the matrix in order to do correct preallocation
3714     */
3715 
3716     /* first get start and end of "diagonal" columns */
3717     if (csize == PETSC_DECIDE) {
3718       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3719       if (mglobal == n) { /* square matrix */
3720         nlocal = m;
3721       } else {
3722         nlocal = n/size + ((n % size) > rank);
3723       }
3724     } else {
3725       nlocal = csize;
3726     }
3727     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3728     rstart = rend - nlocal;
3729     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3730 
3731     /* next, compute all the lengths */
3732     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3733     olens = dlens + m;
3734     for (i=0; i<m; i++) {
3735       jend = ii[i+1] - ii[i];
3736       olen = 0;
3737       dlen = 0;
3738       for (j=0; j<jend; j++) {
3739         if (*jj < rstart || *jj >= rend) olen++;
3740         else dlen++;
3741         jj++;
3742       }
3743       olens[i] = olen;
3744       dlens[i] = dlen;
3745     }
3746     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3747     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3748     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3749     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3750     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3751     ierr = PetscFree(dlens);CHKERRQ(ierr);
3752   } else {
3753     PetscInt ml,nl;
3754 
3755     M    = *newmat;
3756     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3757     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3758     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3759     /*
3760          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3761        rather than the slower MatSetValues().
3762     */
3763     M->was_assembled = PETSC_TRUE;
3764     M->assembled     = PETSC_FALSE;
3765   }
3766   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3767   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3768   ii   = aij->i;
3769   jj   = aij->j;
3770   aa   = aij->a;
3771   for (i=0; i<m; i++) {
3772     row   = rstart + i;
3773     nz    = ii[i+1] - ii[i];
3774     cwork = jj;     jj += nz;
3775     vwork = aa;     aa += nz;
3776     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3777   }
3778 
3779   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3780   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3781   *newmat = M;
3782 
3783   /* save submatrix used in processor for next request */
3784   if (call ==  MAT_INITIAL_MATRIX) {
3785     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3786     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3787   }
3788   PetscFunctionReturn(0);
3789 }
3790 
3791 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3792 {
3793   PetscInt       m,cstart, cend,j,nnz,i,d;
3794   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3795   const PetscInt *JJ;
3796   PetscScalar    *values;
3797   PetscErrorCode ierr;
3798   PetscBool      nooffprocentries;
3799 
3800   PetscFunctionBegin;
3801   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3802 
3803   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3804   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3805   m      = B->rmap->n;
3806   cstart = B->cmap->rstart;
3807   cend   = B->cmap->rend;
3808   rstart = B->rmap->rstart;
3809 
3810   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3811 
3812 #if defined(PETSC_USE_DEBUG)
3813   for (i=0; i<m; i++) {
3814     nnz = Ii[i+1]- Ii[i];
3815     JJ  = J + Ii[i];
3816     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3817     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3818     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3819   }
3820 #endif
3821 
3822   for (i=0; i<m; i++) {
3823     nnz     = Ii[i+1]- Ii[i];
3824     JJ      = J + Ii[i];
3825     nnz_max = PetscMax(nnz_max,nnz);
3826     d       = 0;
3827     for (j=0; j<nnz; j++) {
3828       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3829     }
3830     d_nnz[i] = d;
3831     o_nnz[i] = nnz - d;
3832   }
3833   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3834   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3835 
3836   if (v) values = (PetscScalar*)v;
3837   else {
3838     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3839   }
3840 
3841   for (i=0; i<m; i++) {
3842     ii   = i + rstart;
3843     nnz  = Ii[i+1]- Ii[i];
3844     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3845   }
3846   nooffprocentries    = B->nooffprocentries;
3847   B->nooffprocentries = PETSC_TRUE;
3848   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3849   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3850   B->nooffprocentries = nooffprocentries;
3851 
3852   if (!v) {
3853     ierr = PetscFree(values);CHKERRQ(ierr);
3854   }
3855   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3856   PetscFunctionReturn(0);
3857 }
3858 
3859 /*@
3860    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3861    (the default parallel PETSc format).
3862 
3863    Collective on MPI_Comm
3864 
3865    Input Parameters:
3866 +  B - the matrix
3867 .  i - the indices into j for the start of each local row (starts with zero)
3868 .  j - the column indices for each local row (starts with zero)
3869 -  v - optional values in the matrix
3870 
3871    Level: developer
3872 
3873    Notes:
3874        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3875      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3876      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3877 
3878        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3879 
3880        The format which is used for the sparse matrix input, is equivalent to a
3881     row-major ordering.. i.e for the following matrix, the input data expected is
3882     as shown
3883 
3884 $        1 0 0
3885 $        2 0 3     P0
3886 $       -------
3887 $        4 5 6     P1
3888 $
3889 $     Process0 [P0]: rows_owned=[0,1]
3890 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3891 $        j =  {0,0,2}  [size = 3]
3892 $        v =  {1,2,3}  [size = 3]
3893 $
3894 $     Process1 [P1]: rows_owned=[2]
3895 $        i =  {0,3}    [size = nrow+1  = 1+1]
3896 $        j =  {0,1,2}  [size = 3]
3897 $        v =  {4,5,6}  [size = 3]
3898 
3899 .keywords: matrix, aij, compressed row, sparse, parallel
3900 
3901 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3902           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3903 @*/
3904 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3905 {
3906   PetscErrorCode ierr;
3907 
3908   PetscFunctionBegin;
3909   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3910   PetscFunctionReturn(0);
3911 }
3912 
3913 /*@C
3914    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3915    (the default parallel PETSc format).  For good matrix assembly performance
3916    the user should preallocate the matrix storage by setting the parameters
3917    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3918    performance can be increased by more than a factor of 50.
3919 
3920    Collective on MPI_Comm
3921 
3922    Input Parameters:
3923 +  B - the matrix
3924 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3925            (same value is used for all local rows)
3926 .  d_nnz - array containing the number of nonzeros in the various rows of the
3927            DIAGONAL portion of the local submatrix (possibly different for each row)
3928            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3929            The size of this array is equal to the number of local rows, i.e 'm'.
3930            For matrices that will be factored, you must leave room for (and set)
3931            the diagonal entry even if it is zero.
3932 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3933            submatrix (same value is used for all local rows).
3934 -  o_nnz - array containing the number of nonzeros in the various rows of the
3935            OFF-DIAGONAL portion of the local submatrix (possibly different for
3936            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3937            structure. The size of this array is equal to the number
3938            of local rows, i.e 'm'.
3939 
3940    If the *_nnz parameter is given then the *_nz parameter is ignored
3941 
3942    The AIJ format (also called the Yale sparse matrix format or
3943    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3944    storage.  The stored row and column indices begin with zero.
3945    See Users-Manual: ch_mat for details.
3946 
3947    The parallel matrix is partitioned such that the first m0 rows belong to
3948    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3949    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3950 
3951    The DIAGONAL portion of the local submatrix of a processor can be defined
3952    as the submatrix which is obtained by extraction the part corresponding to
3953    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3954    first row that belongs to the processor, r2 is the last row belonging to
3955    the this processor, and c1-c2 is range of indices of the local part of a
3956    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3957    common case of a square matrix, the row and column ranges are the same and
3958    the DIAGONAL part is also square. The remaining portion of the local
3959    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3960 
3961    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3962 
3963    You can call MatGetInfo() to get information on how effective the preallocation was;
3964    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3965    You can also run with the option -info and look for messages with the string
3966    malloc in them to see if additional memory allocation was needed.
3967 
3968    Example usage:
3969 
3970    Consider the following 8x8 matrix with 34 non-zero values, that is
3971    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3972    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3973    as follows:
3974 
3975 .vb
3976             1  2  0  |  0  3  0  |  0  4
3977     Proc0   0  5  6  |  7  0  0  |  8  0
3978             9  0 10  | 11  0  0  | 12  0
3979     -------------------------------------
3980            13  0 14  | 15 16 17  |  0  0
3981     Proc1   0 18  0  | 19 20 21  |  0  0
3982             0  0  0  | 22 23  0  | 24  0
3983     -------------------------------------
3984     Proc2  25 26 27  |  0  0 28  | 29  0
3985            30  0  0  | 31 32 33  |  0 34
3986 .ve
3987 
3988    This can be represented as a collection of submatrices as:
3989 
3990 .vb
3991       A B C
3992       D E F
3993       G H I
3994 .ve
3995 
3996    Where the submatrices A,B,C are owned by proc0, D,E,F are
3997    owned by proc1, G,H,I are owned by proc2.
3998 
3999    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4000    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4001    The 'M','N' parameters are 8,8, and have the same values on all procs.
4002 
4003    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4004    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4005    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4006    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4007    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4008    matrix, ans [DF] as another SeqAIJ matrix.
4009 
4010    When d_nz, o_nz parameters are specified, d_nz storage elements are
4011    allocated for every row of the local diagonal submatrix, and o_nz
4012    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4013    One way to choose d_nz and o_nz is to use the max nonzerors per local
4014    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4015    In this case, the values of d_nz,o_nz are:
4016 .vb
4017      proc0 : dnz = 2, o_nz = 2
4018      proc1 : dnz = 3, o_nz = 2
4019      proc2 : dnz = 1, o_nz = 4
4020 .ve
4021    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4022    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4023    for proc3. i.e we are using 12+15+10=37 storage locations to store
4024    34 values.
4025 
4026    When d_nnz, o_nnz parameters are specified, the storage is specified
4027    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4028    In the above case the values for d_nnz,o_nnz are:
4029 .vb
4030      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4031      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4032      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4033 .ve
4034    Here the space allocated is sum of all the above values i.e 34, and
4035    hence pre-allocation is perfect.
4036 
4037    Level: intermediate
4038 
4039 .keywords: matrix, aij, compressed row, sparse, parallel
4040 
4041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4042           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4043 @*/
4044 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4045 {
4046   PetscErrorCode ierr;
4047 
4048   PetscFunctionBegin;
4049   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4050   PetscValidType(B,1);
4051   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4052   PetscFunctionReturn(0);
4053 }
4054 
4055 /*@
4056      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4057          CSR format the local rows.
4058 
4059    Collective on MPI_Comm
4060 
4061    Input Parameters:
4062 +  comm - MPI communicator
4063 .  m - number of local rows (Cannot be PETSC_DECIDE)
4064 .  n - This value should be the same as the local size used in creating the
4065        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4066        calculated if N is given) For square matrices n is almost always m.
4067 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4068 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4069 .   i - row indices
4070 .   j - column indices
4071 -   a - matrix values
4072 
4073    Output Parameter:
4074 .   mat - the matrix
4075 
4076    Level: intermediate
4077 
4078    Notes:
4079        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4080      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4081      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4082 
4083        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4084 
4085        The format which is used for the sparse matrix input, is equivalent to a
4086     row-major ordering.. i.e for the following matrix, the input data expected is
4087     as shown
4088 
4089 $        1 0 0
4090 $        2 0 3     P0
4091 $       -------
4092 $        4 5 6     P1
4093 $
4094 $     Process0 [P0]: rows_owned=[0,1]
4095 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4096 $        j =  {0,0,2}  [size = 3]
4097 $        v =  {1,2,3}  [size = 3]
4098 $
4099 $     Process1 [P1]: rows_owned=[2]
4100 $        i =  {0,3}    [size = nrow+1  = 1+1]
4101 $        j =  {0,1,2}  [size = 3]
4102 $        v =  {4,5,6}  [size = 3]
4103 
4104 .keywords: matrix, aij, compressed row, sparse, parallel
4105 
4106 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4107           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4108 @*/
4109 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4110 {
4111   PetscErrorCode ierr;
4112 
4113   PetscFunctionBegin;
4114   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4115   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4116   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4117   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4118   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4119   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4120   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4121   PetscFunctionReturn(0);
4122 }
4123 
4124 /*@C
4125    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4126    (the default parallel PETSc format).  For good matrix assembly performance
4127    the user should preallocate the matrix storage by setting the parameters
4128    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4129    performance can be increased by more than a factor of 50.
4130 
4131    Collective on MPI_Comm
4132 
4133    Input Parameters:
4134 +  comm - MPI communicator
4135 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4136            This value should be the same as the local size used in creating the
4137            y vector for the matrix-vector product y = Ax.
4138 .  n - This value should be the same as the local size used in creating the
4139        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4140        calculated if N is given) For square matrices n is almost always m.
4141 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4142 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4143 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4144            (same value is used for all local rows)
4145 .  d_nnz - array containing the number of nonzeros in the various rows of the
4146            DIAGONAL portion of the local submatrix (possibly different for each row)
4147            or NULL, if d_nz is used to specify the nonzero structure.
4148            The size of this array is equal to the number of local rows, i.e 'm'.
4149 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4150            submatrix (same value is used for all local rows).
4151 -  o_nnz - array containing the number of nonzeros in the various rows of the
4152            OFF-DIAGONAL portion of the local submatrix (possibly different for
4153            each row) or NULL, if o_nz is used to specify the nonzero
4154            structure. The size of this array is equal to the number
4155            of local rows, i.e 'm'.
4156 
4157    Output Parameter:
4158 .  A - the matrix
4159 
4160    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4161    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4162    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4163 
4164    Notes:
4165    If the *_nnz parameter is given then the *_nz parameter is ignored
4166 
4167    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4168    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4169    storage requirements for this matrix.
4170 
4171    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4172    processor than it must be used on all processors that share the object for
4173    that argument.
4174 
4175    The user MUST specify either the local or global matrix dimensions
4176    (possibly both).
4177 
4178    The parallel matrix is partitioned across processors such that the
4179    first m0 rows belong to process 0, the next m1 rows belong to
4180    process 1, the next m2 rows belong to process 2 etc.. where
4181    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4182    values corresponding to [m x N] submatrix.
4183 
4184    The columns are logically partitioned with the n0 columns belonging
4185    to 0th partition, the next n1 columns belonging to the next
4186    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4187 
4188    The DIAGONAL portion of the local submatrix on any given processor
4189    is the submatrix corresponding to the rows and columns m,n
4190    corresponding to the given processor. i.e diagonal matrix on
4191    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4192    etc. The remaining portion of the local submatrix [m x (N-n)]
4193    constitute the OFF-DIAGONAL portion. The example below better
4194    illustrates this concept.
4195 
4196    For a square global matrix we define each processor's diagonal portion
4197    to be its local rows and the corresponding columns (a square submatrix);
4198    each processor's off-diagonal portion encompasses the remainder of the
4199    local matrix (a rectangular submatrix).
4200 
4201    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4202 
4203    When calling this routine with a single process communicator, a matrix of
4204    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4205    type of communicator, use the construction mechanism
4206 .vb
4207      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4208 .ve
4209 
4210 $     MatCreate(...,&A);
4211 $     MatSetType(A,MATMPIAIJ);
4212 $     MatSetSizes(A, m,n,M,N);
4213 $     MatMPIAIJSetPreallocation(A,...);
4214 
4215    By default, this format uses inodes (identical nodes) when possible.
4216    We search for consecutive rows with the same nonzero structure, thereby
4217    reusing matrix information to achieve increased efficiency.
4218 
4219    Options Database Keys:
4220 +  -mat_no_inode  - Do not use inodes
4221 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4222 
4223 
4224 
4225    Example usage:
4226 
4227    Consider the following 8x8 matrix with 34 non-zero values, that is
4228    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4229    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4230    as follows
4231 
4232 .vb
4233             1  2  0  |  0  3  0  |  0  4
4234     Proc0   0  5  6  |  7  0  0  |  8  0
4235             9  0 10  | 11  0  0  | 12  0
4236     -------------------------------------
4237            13  0 14  | 15 16 17  |  0  0
4238     Proc1   0 18  0  | 19 20 21  |  0  0
4239             0  0  0  | 22 23  0  | 24  0
4240     -------------------------------------
4241     Proc2  25 26 27  |  0  0 28  | 29  0
4242            30  0  0  | 31 32 33  |  0 34
4243 .ve
4244 
4245    This can be represented as a collection of submatrices as
4246 
4247 .vb
4248       A B C
4249       D E F
4250       G H I
4251 .ve
4252 
4253    Where the submatrices A,B,C are owned by proc0, D,E,F are
4254    owned by proc1, G,H,I are owned by proc2.
4255 
4256    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4257    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4258    The 'M','N' parameters are 8,8, and have the same values on all procs.
4259 
4260    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4261    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4262    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4263    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4264    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4265    matrix, ans [DF] as another SeqAIJ matrix.
4266 
4267    When d_nz, o_nz parameters are specified, d_nz storage elements are
4268    allocated for every row of the local diagonal submatrix, and o_nz
4269    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4270    One way to choose d_nz and o_nz is to use the max nonzerors per local
4271    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4272    In this case, the values of d_nz,o_nz are
4273 .vb
4274      proc0 : dnz = 2, o_nz = 2
4275      proc1 : dnz = 3, o_nz = 2
4276      proc2 : dnz = 1, o_nz = 4
4277 .ve
4278    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4279    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4280    for proc3. i.e we are using 12+15+10=37 storage locations to store
4281    34 values.
4282 
4283    When d_nnz, o_nnz parameters are specified, the storage is specified
4284    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4285    In the above case the values for d_nnz,o_nnz are
4286 .vb
4287      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4288      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4289      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4290 .ve
4291    Here the space allocated is sum of all the above values i.e 34, and
4292    hence pre-allocation is perfect.
4293 
4294    Level: intermediate
4295 
4296 .keywords: matrix, aij, compressed row, sparse, parallel
4297 
4298 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4299           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4300 @*/
4301 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4302 {
4303   PetscErrorCode ierr;
4304   PetscMPIInt    size;
4305 
4306   PetscFunctionBegin;
4307   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4308   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4309   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4310   if (size > 1) {
4311     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4312     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4313   } else {
4314     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4315     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4316   }
4317   PetscFunctionReturn(0);
4318 }
4319 
4320 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4321 {
4322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4323   PetscBool      flg;
4324   PetscErrorCode ierr;
4325 
4326   PetscFunctionBegin;
4327   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4328   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4329   if (Ad)     *Ad     = a->A;
4330   if (Ao)     *Ao     = a->B;
4331   if (colmap) *colmap = a->garray;
4332   PetscFunctionReturn(0);
4333 }
4334 
4335 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4336 {
4337   PetscErrorCode ierr;
4338   PetscInt       m,N,i,rstart,nnz,Ii;
4339   PetscInt       *indx;
4340   PetscScalar    *values;
4341 
4342   PetscFunctionBegin;
4343   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4344   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4345     PetscInt       *dnz,*onz,sum,bs,cbs;
4346 
4347     if (n == PETSC_DECIDE) {
4348       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4349     }
4350     /* Check sum(n) = N */
4351     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4352     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4353 
4354     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4355     rstart -= m;
4356 
4357     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4358     for (i=0; i<m; i++) {
4359       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4360       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4361       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4362     }
4363 
4364     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4365     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4366     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4367     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4368     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4369     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4370     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4371     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4372   }
4373 
4374   /* numeric phase */
4375   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4376   for (i=0; i<m; i++) {
4377     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4378     Ii   = i + rstart;
4379     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4380     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4381   }
4382   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4388 {
4389   PetscErrorCode    ierr;
4390   PetscMPIInt       rank;
4391   PetscInt          m,N,i,rstart,nnz;
4392   size_t            len;
4393   const PetscInt    *indx;
4394   PetscViewer       out;
4395   char              *name;
4396   Mat               B;
4397   const PetscScalar *values;
4398 
4399   PetscFunctionBegin;
4400   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4401   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4402   /* Should this be the type of the diagonal block of A? */
4403   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4404   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4405   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4406   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4407   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4408   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4409   for (i=0; i<m; i++) {
4410     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4411     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4412     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4413   }
4414   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4415   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4416 
4417   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4418   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4419   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4420   sprintf(name,"%s.%d",outfile,rank);
4421   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4422   ierr = PetscFree(name);CHKERRQ(ierr);
4423   ierr = MatView(B,out);CHKERRQ(ierr);
4424   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4425   ierr = MatDestroy(&B);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4430 {
4431   PetscErrorCode      ierr;
4432   Mat_Merge_SeqsToMPI *merge;
4433   PetscContainer      container;
4434 
4435   PetscFunctionBegin;
4436   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4437   if (container) {
4438     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4446     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4447     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4448     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4449     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4450     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4451     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4452     ierr = PetscFree(merge);CHKERRQ(ierr);
4453     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4454   }
4455   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4456   PetscFunctionReturn(0);
4457 }
4458 
4459 #include <../src/mat/utils/freespace.h>
4460 #include <petscbt.h>
4461 
4462 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4463 {
4464   PetscErrorCode      ierr;
4465   MPI_Comm            comm;
4466   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4467   PetscMPIInt         size,rank,taga,*len_s;
4468   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4469   PetscInt            proc,m;
4470   PetscInt            **buf_ri,**buf_rj;
4471   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4472   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4473   MPI_Request         *s_waits,*r_waits;
4474   MPI_Status          *status;
4475   MatScalar           *aa=a->a;
4476   MatScalar           **abuf_r,*ba_i;
4477   Mat_Merge_SeqsToMPI *merge;
4478   PetscContainer      container;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4482   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4483 
4484   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4485   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4486 
4487   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4488   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4489 
4490   bi     = merge->bi;
4491   bj     = merge->bj;
4492   buf_ri = merge->buf_ri;
4493   buf_rj = merge->buf_rj;
4494 
4495   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4496   owners = merge->rowmap->range;
4497   len_s  = merge->len_s;
4498 
4499   /* send and recv matrix values */
4500   /*-----------------------------*/
4501   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4502   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4503 
4504   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4505   for (proc=0,k=0; proc<size; proc++) {
4506     if (!len_s[proc]) continue;
4507     i    = owners[proc];
4508     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4509     k++;
4510   }
4511 
4512   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4513   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4514   ierr = PetscFree(status);CHKERRQ(ierr);
4515 
4516   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4517   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4518 
4519   /* insert mat values of mpimat */
4520   /*----------------------------*/
4521   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4522   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4523 
4524   for (k=0; k<merge->nrecv; k++) {
4525     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4526     nrows       = *(buf_ri_k[k]);
4527     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4528     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4529   }
4530 
4531   /* set values of ba */
4532   m = merge->rowmap->n;
4533   for (i=0; i<m; i++) {
4534     arow = owners[rank] + i;
4535     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4536     bnzi = bi[i+1] - bi[i];
4537     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4538 
4539     /* add local non-zero vals of this proc's seqmat into ba */
4540     anzi   = ai[arow+1] - ai[arow];
4541     aj     = a->j + ai[arow];
4542     aa     = a->a + ai[arow];
4543     nextaj = 0;
4544     for (j=0; nextaj<anzi; j++) {
4545       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4546         ba_i[j] += aa[nextaj++];
4547       }
4548     }
4549 
4550     /* add received vals into ba */
4551     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4552       /* i-th row */
4553       if (i == *nextrow[k]) {
4554         anzi   = *(nextai[k]+1) - *nextai[k];
4555         aj     = buf_rj[k] + *(nextai[k]);
4556         aa     = abuf_r[k] + *(nextai[k]);
4557         nextaj = 0;
4558         for (j=0; nextaj<anzi; j++) {
4559           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4560             ba_i[j] += aa[nextaj++];
4561           }
4562         }
4563         nextrow[k]++; nextai[k]++;
4564       }
4565     }
4566     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4567   }
4568   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4569   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4570 
4571   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4572   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4573   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4574   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4575   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4580 {
4581   PetscErrorCode      ierr;
4582   Mat                 B_mpi;
4583   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4584   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4585   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4586   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4587   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4588   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4589   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4590   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4591   MPI_Status          *status;
4592   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4593   PetscBT             lnkbt;
4594   Mat_Merge_SeqsToMPI *merge;
4595   PetscContainer      container;
4596 
4597   PetscFunctionBegin;
4598   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4599 
4600   /* make sure it is a PETSc comm */
4601   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4603   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4604 
4605   ierr = PetscNew(&merge);CHKERRQ(ierr);
4606   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4607 
4608   /* determine row ownership */
4609   /*---------------------------------------------------------*/
4610   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4612   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4613   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4614   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4615   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4616   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4617 
4618   m      = merge->rowmap->n;
4619   owners = merge->rowmap->range;
4620 
4621   /* determine the number of messages to send, their lengths */
4622   /*---------------------------------------------------------*/
4623   len_s = merge->len_s;
4624 
4625   len          = 0; /* length of buf_si[] */
4626   merge->nsend = 0;
4627   for (proc=0; proc<size; proc++) {
4628     len_si[proc] = 0;
4629     if (proc == rank) {
4630       len_s[proc] = 0;
4631     } else {
4632       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4633       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4634     }
4635     if (len_s[proc]) {
4636       merge->nsend++;
4637       nrows = 0;
4638       for (i=owners[proc]; i<owners[proc+1]; i++) {
4639         if (ai[i+1] > ai[i]) nrows++;
4640       }
4641       len_si[proc] = 2*(nrows+1);
4642       len         += len_si[proc];
4643     }
4644   }
4645 
4646   /* determine the number and length of messages to receive for ij-structure */
4647   /*-------------------------------------------------------------------------*/
4648   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4649   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4650 
4651   /* post the Irecv of j-structure */
4652   /*-------------------------------*/
4653   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4654   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4655 
4656   /* post the Isend of j-structure */
4657   /*--------------------------------*/
4658   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4659 
4660   for (proc=0, k=0; proc<size; proc++) {
4661     if (!len_s[proc]) continue;
4662     i    = owners[proc];
4663     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4664     k++;
4665   }
4666 
4667   /* receives and sends of j-structure are complete */
4668   /*------------------------------------------------*/
4669   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4670   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4671 
4672   /* send and recv i-structure */
4673   /*---------------------------*/
4674   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4675   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4676 
4677   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4678   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4679   for (proc=0,k=0; proc<size; proc++) {
4680     if (!len_s[proc]) continue;
4681     /* form outgoing message for i-structure:
4682          buf_si[0]:                 nrows to be sent
4683                [1:nrows]:           row index (global)
4684                [nrows+1:2*nrows+1]: i-structure index
4685     */
4686     /*-------------------------------------------*/
4687     nrows       = len_si[proc]/2 - 1;
4688     buf_si_i    = buf_si + nrows+1;
4689     buf_si[0]   = nrows;
4690     buf_si_i[0] = 0;
4691     nrows       = 0;
4692     for (i=owners[proc]; i<owners[proc+1]; i++) {
4693       anzi = ai[i+1] - ai[i];
4694       if (anzi) {
4695         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4696         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4697         nrows++;
4698       }
4699     }
4700     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4701     k++;
4702     buf_si += len_si[proc];
4703   }
4704 
4705   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4706   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4707 
4708   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4709   for (i=0; i<merge->nrecv; i++) {
4710     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4711   }
4712 
4713   ierr = PetscFree(len_si);CHKERRQ(ierr);
4714   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4715   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4716   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4717   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4718   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4719   ierr = PetscFree(status);CHKERRQ(ierr);
4720 
4721   /* compute a local seq matrix in each processor */
4722   /*----------------------------------------------*/
4723   /* allocate bi array and free space for accumulating nonzero column info */
4724   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4725   bi[0] = 0;
4726 
4727   /* create and initialize a linked list */
4728   nlnk = N+1;
4729   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4730 
4731   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4732   len  = ai[owners[rank+1]] - ai[owners[rank]];
4733   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4734 
4735   current_space = free_space;
4736 
4737   /* determine symbolic info for each local row */
4738   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4739 
4740   for (k=0; k<merge->nrecv; k++) {
4741     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4742     nrows       = *buf_ri_k[k];
4743     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4744     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4745   }
4746 
4747   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4748   len  = 0;
4749   for (i=0; i<m; i++) {
4750     bnzi = 0;
4751     /* add local non-zero cols of this proc's seqmat into lnk */
4752     arow  = owners[rank] + i;
4753     anzi  = ai[arow+1] - ai[arow];
4754     aj    = a->j + ai[arow];
4755     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4756     bnzi += nlnk;
4757     /* add received col data into lnk */
4758     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4759       if (i == *nextrow[k]) { /* i-th row */
4760         anzi  = *(nextai[k]+1) - *nextai[k];
4761         aj    = buf_rj[k] + *nextai[k];
4762         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4763         bnzi += nlnk;
4764         nextrow[k]++; nextai[k]++;
4765       }
4766     }
4767     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4768 
4769     /* if free space is not available, make more free space */
4770     if (current_space->local_remaining<bnzi) {
4771       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4772       nspacedouble++;
4773     }
4774     /* copy data into free space, then initialize lnk */
4775     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4776     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4777 
4778     current_space->array           += bnzi;
4779     current_space->local_used      += bnzi;
4780     current_space->local_remaining -= bnzi;
4781 
4782     bi[i+1] = bi[i] + bnzi;
4783   }
4784 
4785   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4786 
4787   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4788   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4789   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4790 
4791   /* create symbolic parallel matrix B_mpi */
4792   /*---------------------------------------*/
4793   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4794   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4795   if (n==PETSC_DECIDE) {
4796     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4797   } else {
4798     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4799   }
4800   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4801   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4802   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4803   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4804   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4805 
4806   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4807   B_mpi->assembled    = PETSC_FALSE;
4808   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4809   merge->bi           = bi;
4810   merge->bj           = bj;
4811   merge->buf_ri       = buf_ri;
4812   merge->buf_rj       = buf_rj;
4813   merge->coi          = NULL;
4814   merge->coj          = NULL;
4815   merge->owners_co    = NULL;
4816 
4817   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4818 
4819   /* attach the supporting struct to B_mpi for reuse */
4820   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4821   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4822   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4823   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4824   *mpimat = B_mpi;
4825 
4826   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4827   PetscFunctionReturn(0);
4828 }
4829 
4830 /*@C
4831       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4832                  matrices from each processor
4833 
4834     Collective on MPI_Comm
4835 
4836    Input Parameters:
4837 +    comm - the communicators the parallel matrix will live on
4838 .    seqmat - the input sequential matrices
4839 .    m - number of local rows (or PETSC_DECIDE)
4840 .    n - number of local columns (or PETSC_DECIDE)
4841 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4842 
4843    Output Parameter:
4844 .    mpimat - the parallel matrix generated
4845 
4846     Level: advanced
4847 
4848    Notes:
4849      The dimensions of the sequential matrix in each processor MUST be the same.
4850      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4851      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4852 @*/
4853 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4854 {
4855   PetscErrorCode ierr;
4856   PetscMPIInt    size;
4857 
4858   PetscFunctionBegin;
4859   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4860   if (size == 1) {
4861     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4862     if (scall == MAT_INITIAL_MATRIX) {
4863       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4864     } else {
4865       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4866     }
4867     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4868     PetscFunctionReturn(0);
4869   }
4870   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4871   if (scall == MAT_INITIAL_MATRIX) {
4872     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4873   }
4874   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4875   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4876   PetscFunctionReturn(0);
4877 }
4878 
4879 /*@
4880      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4881           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4882           with MatGetSize()
4883 
4884     Not Collective
4885 
4886    Input Parameters:
4887 +    A - the matrix
4888 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4889 
4890    Output Parameter:
4891 .    A_loc - the local sequential matrix generated
4892 
4893     Level: developer
4894 
4895 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4896 
4897 @*/
4898 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4899 {
4900   PetscErrorCode ierr;
4901   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4902   Mat_SeqAIJ     *mat,*a,*b;
4903   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4904   MatScalar      *aa,*ba,*cam;
4905   PetscScalar    *ca;
4906   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4907   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4908   PetscBool      match;
4909   MPI_Comm       comm;
4910   PetscMPIInt    size;
4911 
4912   PetscFunctionBegin;
4913   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4914   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4915   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4916   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4917   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4918 
4919   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4920   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4921   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4922   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4923   aa = a->a; ba = b->a;
4924   if (scall == MAT_INITIAL_MATRIX) {
4925     if (size == 1) {
4926       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4927       PetscFunctionReturn(0);
4928     }
4929 
4930     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4931     ci[0] = 0;
4932     for (i=0; i<am; i++) {
4933       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4934     }
4935     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4936     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4937     k    = 0;
4938     for (i=0; i<am; i++) {
4939       ncols_o = bi[i+1] - bi[i];
4940       ncols_d = ai[i+1] - ai[i];
4941       /* off-diagonal portion of A */
4942       for (jo=0; jo<ncols_o; jo++) {
4943         col = cmap[*bj];
4944         if (col >= cstart) break;
4945         cj[k]   = col; bj++;
4946         ca[k++] = *ba++;
4947       }
4948       /* diagonal portion of A */
4949       for (j=0; j<ncols_d; j++) {
4950         cj[k]   = cstart + *aj++;
4951         ca[k++] = *aa++;
4952       }
4953       /* off-diagonal portion of A */
4954       for (j=jo; j<ncols_o; j++) {
4955         cj[k]   = cmap[*bj++];
4956         ca[k++] = *ba++;
4957       }
4958     }
4959     /* put together the new matrix */
4960     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4961     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4962     /* Since these are PETSc arrays, change flags to free them as necessary. */
4963     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4964     mat->free_a  = PETSC_TRUE;
4965     mat->free_ij = PETSC_TRUE;
4966     mat->nonew   = 0;
4967   } else if (scall == MAT_REUSE_MATRIX) {
4968     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4969     ci = mat->i; cj = mat->j; cam = mat->a;
4970     for (i=0; i<am; i++) {
4971       /* off-diagonal portion of A */
4972       ncols_o = bi[i+1] - bi[i];
4973       for (jo=0; jo<ncols_o; jo++) {
4974         col = cmap[*bj];
4975         if (col >= cstart) break;
4976         *cam++ = *ba++; bj++;
4977       }
4978       /* diagonal portion of A */
4979       ncols_d = ai[i+1] - ai[i];
4980       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4981       /* off-diagonal portion of A */
4982       for (j=jo; j<ncols_o; j++) {
4983         *cam++ = *ba++; bj++;
4984       }
4985     }
4986   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4987   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 /*@C
4992      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4993 
4994     Not Collective
4995 
4996    Input Parameters:
4997 +    A - the matrix
4998 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4999 -    row, col - index sets of rows and columns to extract (or NULL)
5000 
5001    Output Parameter:
5002 .    A_loc - the local sequential matrix generated
5003 
5004     Level: developer
5005 
5006 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5007 
5008 @*/
5009 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5010 {
5011   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5012   PetscErrorCode ierr;
5013   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5014   IS             isrowa,iscola;
5015   Mat            *aloc;
5016   PetscBool      match;
5017 
5018   PetscFunctionBegin;
5019   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5020   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5021   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5022   if (!row) {
5023     start = A->rmap->rstart; end = A->rmap->rend;
5024     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5025   } else {
5026     isrowa = *row;
5027   }
5028   if (!col) {
5029     start = A->cmap->rstart;
5030     cmap  = a->garray;
5031     nzA   = a->A->cmap->n;
5032     nzB   = a->B->cmap->n;
5033     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5034     ncols = 0;
5035     for (i=0; i<nzB; i++) {
5036       if (cmap[i] < start) idx[ncols++] = cmap[i];
5037       else break;
5038     }
5039     imark = i;
5040     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5041     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5042     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5043   } else {
5044     iscola = *col;
5045   }
5046   if (scall != MAT_INITIAL_MATRIX) {
5047     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5048     aloc[0] = *A_loc;
5049   }
5050   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5051   *A_loc = aloc[0];
5052   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5053   if (!row) {
5054     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5055   }
5056   if (!col) {
5057     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5058   }
5059   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5060   PetscFunctionReturn(0);
5061 }
5062 
5063 /*@C
5064     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5065 
5066     Collective on Mat
5067 
5068    Input Parameters:
5069 +    A,B - the matrices in mpiaij format
5070 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5071 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5072 
5073    Output Parameter:
5074 +    rowb, colb - index sets of rows and columns of B to extract
5075 -    B_seq - the sequential matrix generated
5076 
5077     Level: developer
5078 
5079 @*/
5080 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5081 {
5082   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5083   PetscErrorCode ierr;
5084   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5085   IS             isrowb,iscolb;
5086   Mat            *bseq=NULL;
5087 
5088   PetscFunctionBegin;
5089   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5090     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5091   }
5092   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5093 
5094   if (scall == MAT_INITIAL_MATRIX) {
5095     start = A->cmap->rstart;
5096     cmap  = a->garray;
5097     nzA   = a->A->cmap->n;
5098     nzB   = a->B->cmap->n;
5099     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5100     ncols = 0;
5101     for (i=0; i<nzB; i++) {  /* row < local row index */
5102       if (cmap[i] < start) idx[ncols++] = cmap[i];
5103       else break;
5104     }
5105     imark = i;
5106     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5107     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5108     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5109     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5110   } else {
5111     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5112     isrowb  = *rowb; iscolb = *colb;
5113     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5114     bseq[0] = *B_seq;
5115   }
5116   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5117   *B_seq = bseq[0];
5118   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5119   if (!rowb) {
5120     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5121   } else {
5122     *rowb = isrowb;
5123   }
5124   if (!colb) {
5125     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5126   } else {
5127     *colb = iscolb;
5128   }
5129   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5130   PetscFunctionReturn(0);
5131 }
5132 
5133 /*
5134     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5135     of the OFF-DIAGONAL portion of local A
5136 
5137     Collective on Mat
5138 
5139    Input Parameters:
5140 +    A,B - the matrices in mpiaij format
5141 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5142 
5143    Output Parameter:
5144 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5145 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5146 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5147 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5148 
5149     Level: developer
5150 
5151 */
5152 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5153 {
5154   VecScatter_MPI_General *gen_to,*gen_from;
5155   PetscErrorCode         ierr;
5156   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5157   Mat_SeqAIJ             *b_oth;
5158   VecScatter             ctx;
5159   MPI_Comm               comm;
5160   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5161   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5162   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5163   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5164   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5165   MPI_Request            *rwaits = NULL,*swaits = NULL;
5166   MPI_Status             *sstatus,rstatus;
5167   PetscMPIInt            jj,size;
5168   VecScatterType         type;
5169   PetscBool              mpi1;
5170 
5171   PetscFunctionBegin;
5172   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5174 
5175   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5176     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5177   }
5178   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5180 
5181   if (size == 1) {
5182     startsj_s = NULL;
5183     bufa_ptr  = NULL;
5184     *B_oth    = NULL;
5185     PetscFunctionReturn(0);
5186   }
5187 
5188   ctx = a->Mvctx;
5189   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5190   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5191   if (!mpi1) {
5192     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5193      thus create a->Mvctx_mpi1 */
5194     if (!a->Mvctx_mpi1) {
5195       a->Mvctx_mpi1_flg = PETSC_TRUE;
5196       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5197     }
5198     ctx = a->Mvctx_mpi1;
5199   }
5200   tag = ((PetscObject)ctx)->tag;
5201 
5202   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5203   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5204   nrecvs   = gen_from->n;
5205   nsends   = gen_to->n;
5206 
5207   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5208   srow    = gen_to->indices;    /* local row index to be sent */
5209   sstarts = gen_to->starts;
5210   sprocs  = gen_to->procs;
5211   sstatus = gen_to->sstatus;
5212   sbs     = gen_to->bs;
5213   rstarts = gen_from->starts;
5214   rprocs  = gen_from->procs;
5215   rbs     = gen_from->bs;
5216 
5217   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5218   if (scall == MAT_INITIAL_MATRIX) {
5219     /* i-array */
5220     /*---------*/
5221     /*  post receives */
5222     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5223     for (i=0; i<nrecvs; i++) {
5224       rowlen = rvalues + rstarts[i]*rbs;
5225       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5226       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5227     }
5228 
5229     /* pack the outgoing message */
5230     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5231 
5232     sstartsj[0] = 0;
5233     rstartsj[0] = 0;
5234     len         = 0; /* total length of j or a array to be sent */
5235     k           = 0;
5236     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5237     for (i=0; i<nsends; i++) {
5238       rowlen = svalues + sstarts[i]*sbs;
5239       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5240       for (j=0; j<nrows; j++) {
5241         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5242         for (l=0; l<sbs; l++) {
5243           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5244 
5245           rowlen[j*sbs+l] = ncols;
5246 
5247           len += ncols;
5248           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5249         }
5250         k++;
5251       }
5252       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5253 
5254       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5255     }
5256     /* recvs and sends of i-array are completed */
5257     i = nrecvs;
5258     while (i--) {
5259       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5260     }
5261     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5262     ierr = PetscFree(svalues);CHKERRQ(ierr);
5263 
5264     /* allocate buffers for sending j and a arrays */
5265     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5266     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5267 
5268     /* create i-array of B_oth */
5269     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5270 
5271     b_othi[0] = 0;
5272     len       = 0; /* total length of j or a array to be received */
5273     k         = 0;
5274     for (i=0; i<nrecvs; i++) {
5275       rowlen = rvalues + rstarts[i]*rbs;
5276       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5277       for (j=0; j<nrows; j++) {
5278         b_othi[k+1] = b_othi[k] + rowlen[j];
5279         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5280         k++;
5281       }
5282       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5283     }
5284     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5285 
5286     /* allocate space for j and a arrrays of B_oth */
5287     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5288     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5289 
5290     /* j-array */
5291     /*---------*/
5292     /*  post receives of j-array */
5293     for (i=0; i<nrecvs; i++) {
5294       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5295       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5296     }
5297 
5298     /* pack the outgoing message j-array */
5299     k = 0;
5300     for (i=0; i<nsends; i++) {
5301       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5302       bufJ  = bufj+sstartsj[i];
5303       for (j=0; j<nrows; j++) {
5304         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5305         for (ll=0; ll<sbs; ll++) {
5306           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5307           for (l=0; l<ncols; l++) {
5308             *bufJ++ = cols[l];
5309           }
5310           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5311         }
5312       }
5313       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5314     }
5315 
5316     /* recvs and sends of j-array are completed */
5317     i = nrecvs;
5318     while (i--) {
5319       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5320     }
5321     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5322   } else if (scall == MAT_REUSE_MATRIX) {
5323     sstartsj = *startsj_s;
5324     rstartsj = *startsj_r;
5325     bufa     = *bufa_ptr;
5326     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5327     b_otha   = b_oth->a;
5328   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5329 
5330   /* a-array */
5331   /*---------*/
5332   /*  post receives of a-array */
5333   for (i=0; i<nrecvs; i++) {
5334     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5335     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5336   }
5337 
5338   /* pack the outgoing message a-array */
5339   k = 0;
5340   for (i=0; i<nsends; i++) {
5341     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5342     bufA  = bufa+sstartsj[i];
5343     for (j=0; j<nrows; j++) {
5344       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5345       for (ll=0; ll<sbs; ll++) {
5346         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5347         for (l=0; l<ncols; l++) {
5348           *bufA++ = vals[l];
5349         }
5350         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5351       }
5352     }
5353     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5354   }
5355   /* recvs and sends of a-array are completed */
5356   i = nrecvs;
5357   while (i--) {
5358     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5359   }
5360   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5361   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5362 
5363   if (scall == MAT_INITIAL_MATRIX) {
5364     /* put together the new matrix */
5365     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5366 
5367     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5368     /* Since these are PETSc arrays, change flags to free them as necessary. */
5369     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5370     b_oth->free_a  = PETSC_TRUE;
5371     b_oth->free_ij = PETSC_TRUE;
5372     b_oth->nonew   = 0;
5373 
5374     ierr = PetscFree(bufj);CHKERRQ(ierr);
5375     if (!startsj_s || !bufa_ptr) {
5376       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5377       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5378     } else {
5379       *startsj_s = sstartsj;
5380       *startsj_r = rstartsj;
5381       *bufa_ptr  = bufa;
5382     }
5383   }
5384   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5385   PetscFunctionReturn(0);
5386 }
5387 
5388 /*@C
5389   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5390 
5391   Not Collective
5392 
5393   Input Parameters:
5394 . A - The matrix in mpiaij format
5395 
5396   Output Parameter:
5397 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5398 . colmap - A map from global column index to local index into lvec
5399 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5400 
5401   Level: developer
5402 
5403 @*/
5404 #if defined(PETSC_USE_CTABLE)
5405 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5406 #else
5407 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5408 #endif
5409 {
5410   Mat_MPIAIJ *a;
5411 
5412   PetscFunctionBegin;
5413   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5414   PetscValidPointer(lvec, 2);
5415   PetscValidPointer(colmap, 3);
5416   PetscValidPointer(multScatter, 4);
5417   a = (Mat_MPIAIJ*) A->data;
5418   if (lvec) *lvec = a->lvec;
5419   if (colmap) *colmap = a->colmap;
5420   if (multScatter) *multScatter = a->Mvctx;
5421   PetscFunctionReturn(0);
5422 }
5423 
5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5426 #if defined(PETSC_HAVE_MKL_SPARSE)
5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5428 #endif
5429 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5430 #if defined(PETSC_HAVE_ELEMENTAL)
5431 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5432 #endif
5433 #if defined(PETSC_HAVE_HYPRE)
5434 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5435 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5436 #endif
5437 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5438 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5439 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5440 
5441 /*
5442     Computes (B'*A')' since computing B*A directly is untenable
5443 
5444                n                       p                          p
5445         (              )       (              )         (                  )
5446       m (      A       )  *  n (       B      )   =   m (         C        )
5447         (              )       (              )         (                  )
5448 
5449 */
5450 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5451 {
5452   PetscErrorCode ierr;
5453   Mat            At,Bt,Ct;
5454 
5455   PetscFunctionBegin;
5456   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5457   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5458   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5459   ierr = MatDestroy(&At);CHKERRQ(ierr);
5460   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5461   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5462   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5463   PetscFunctionReturn(0);
5464 }
5465 
5466 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5467 {
5468   PetscErrorCode ierr;
5469   PetscInt       m=A->rmap->n,n=B->cmap->n;
5470   Mat            Cmat;
5471 
5472   PetscFunctionBegin;
5473   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5474   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5475   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5476   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5477   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5478   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5479   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5480   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5481 
5482   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5483 
5484   *C = Cmat;
5485   PetscFunctionReturn(0);
5486 }
5487 
5488 /* ----------------------------------------------------------------*/
5489 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5490 {
5491   PetscErrorCode ierr;
5492 
5493   PetscFunctionBegin;
5494   if (scall == MAT_INITIAL_MATRIX) {
5495     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5496     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5497     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5498   }
5499   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5500   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5501   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 /*MC
5506    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5507 
5508    Options Database Keys:
5509 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5510 
5511   Level: beginner
5512 
5513 .seealso: MatCreateAIJ()
5514 M*/
5515 
5516 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5517 {
5518   Mat_MPIAIJ     *b;
5519   PetscErrorCode ierr;
5520   PetscMPIInt    size;
5521 
5522   PetscFunctionBegin;
5523   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5524 
5525   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5526   B->data       = (void*)b;
5527   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5528   B->assembled  = PETSC_FALSE;
5529   B->insertmode = NOT_SET_VALUES;
5530   b->size       = size;
5531 
5532   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5533 
5534   /* build cache for off array entries formed */
5535   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5536 
5537   b->donotstash  = PETSC_FALSE;
5538   b->colmap      = 0;
5539   b->garray      = 0;
5540   b->roworiented = PETSC_TRUE;
5541 
5542   /* stuff used for matrix vector multiply */
5543   b->lvec  = NULL;
5544   b->Mvctx = NULL;
5545 
5546   /* stuff for MatGetRow() */
5547   b->rowindices   = 0;
5548   b->rowvalues    = 0;
5549   b->getrowactive = PETSC_FALSE;
5550 
5551   /* flexible pointer used in CUSP/CUSPARSE classes */
5552   b->spptr = NULL;
5553 
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5563 #if defined(PETSC_HAVE_MKL_SPARSE)
5564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5565 #endif
5566   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5568 #if defined(PETSC_HAVE_ELEMENTAL)
5569   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5570 #endif
5571 #if defined(PETSC_HAVE_HYPRE)
5572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5573 #endif
5574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5577   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5579 #if defined(PETSC_HAVE_HYPRE)
5580   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5581 #endif
5582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5583   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5584   PetscFunctionReturn(0);
5585 }
5586 
5587 /*@C
5588      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5589          and "off-diagonal" part of the matrix in CSR format.
5590 
5591    Collective on MPI_Comm
5592 
5593    Input Parameters:
5594 +  comm - MPI communicator
5595 .  m - number of local rows (Cannot be PETSC_DECIDE)
5596 .  n - This value should be the same as the local size used in creating the
5597        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5598        calculated if N is given) For square matrices n is almost always m.
5599 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5600 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5601 .   i - row indices for "diagonal" portion of matrix
5602 .   j - column indices
5603 .   a - matrix values
5604 .   oi - row indices for "off-diagonal" portion of matrix
5605 .   oj - column indices
5606 -   oa - matrix values
5607 
5608    Output Parameter:
5609 .   mat - the matrix
5610 
5611    Level: advanced
5612 
5613    Notes:
5614        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5615        must free the arrays once the matrix has been destroyed and not before.
5616 
5617        The i and j indices are 0 based
5618 
5619        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5620 
5621        This sets local rows and cannot be used to set off-processor values.
5622 
5623        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5624        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5625        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5626        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5627        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5628        communication if it is known that only local entries will be set.
5629 
5630 .keywords: matrix, aij, compressed row, sparse, parallel
5631 
5632 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5633           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5634 @*/
5635 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5636 {
5637   PetscErrorCode ierr;
5638   Mat_MPIAIJ     *maij;
5639 
5640   PetscFunctionBegin;
5641   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5642   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5643   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5644   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5645   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5646   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5647   maij = (Mat_MPIAIJ*) (*mat)->data;
5648 
5649   (*mat)->preallocated = PETSC_TRUE;
5650 
5651   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5652   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5653 
5654   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5655   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5656 
5657   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5658   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5659   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5660   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5661 
5662   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5663   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5664   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5665   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5666   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5667   PetscFunctionReturn(0);
5668 }
5669 
5670 /*
5671     Special version for direct calls from Fortran
5672 */
5673 #include <petsc/private/fortranimpl.h>
5674 
5675 /* Change these macros so can be used in void function */
5676 #undef CHKERRQ
5677 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5678 #undef SETERRQ2
5679 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5680 #undef SETERRQ3
5681 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5682 #undef SETERRQ
5683 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5684 
5685 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5686 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5687 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5688 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5689 #else
5690 #endif
5691 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5692 {
5693   Mat            mat  = *mmat;
5694   PetscInt       m    = *mm, n = *mn;
5695   InsertMode     addv = *maddv;
5696   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5697   PetscScalar    value;
5698   PetscErrorCode ierr;
5699 
5700   MatCheckPreallocated(mat,1);
5701   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5702 
5703 #if defined(PETSC_USE_DEBUG)
5704   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5705 #endif
5706   {
5707     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5708     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5709     PetscBool roworiented = aij->roworiented;
5710 
5711     /* Some Variables required in the macro */
5712     Mat        A                 = aij->A;
5713     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5714     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5715     MatScalar  *aa               = a->a;
5716     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5717     Mat        B                 = aij->B;
5718     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5719     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5720     MatScalar  *ba               = b->a;
5721 
5722     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5723     PetscInt  nonew = a->nonew;
5724     MatScalar *ap1,*ap2;
5725 
5726     PetscFunctionBegin;
5727     for (i=0; i<m; i++) {
5728       if (im[i] < 0) continue;
5729 #if defined(PETSC_USE_DEBUG)
5730       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5731 #endif
5732       if (im[i] >= rstart && im[i] < rend) {
5733         row      = im[i] - rstart;
5734         lastcol1 = -1;
5735         rp1      = aj + ai[row];
5736         ap1      = aa + ai[row];
5737         rmax1    = aimax[row];
5738         nrow1    = ailen[row];
5739         low1     = 0;
5740         high1    = nrow1;
5741         lastcol2 = -1;
5742         rp2      = bj + bi[row];
5743         ap2      = ba + bi[row];
5744         rmax2    = bimax[row];
5745         nrow2    = bilen[row];
5746         low2     = 0;
5747         high2    = nrow2;
5748 
5749         for (j=0; j<n; j++) {
5750           if (roworiented) value = v[i*n+j];
5751           else value = v[i+j*m];
5752           if (in[j] >= cstart && in[j] < cend) {
5753             col = in[j] - cstart;
5754             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5755             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5756           } else if (in[j] < 0) continue;
5757 #if defined(PETSC_USE_DEBUG)
5758           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5759           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5760 #endif
5761           else {
5762             if (mat->was_assembled) {
5763               if (!aij->colmap) {
5764                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5765               }
5766 #if defined(PETSC_USE_CTABLE)
5767               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5768               col--;
5769 #else
5770               col = aij->colmap[in[j]] - 1;
5771 #endif
5772               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5773               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5774                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5775                 col  =  in[j];
5776                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5777                 B     = aij->B;
5778                 b     = (Mat_SeqAIJ*)B->data;
5779                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5780                 rp2   = bj + bi[row];
5781                 ap2   = ba + bi[row];
5782                 rmax2 = bimax[row];
5783                 nrow2 = bilen[row];
5784                 low2  = 0;
5785                 high2 = nrow2;
5786                 bm    = aij->B->rmap->n;
5787                 ba    = b->a;
5788               }
5789             } else col = in[j];
5790             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5791           }
5792         }
5793       } else if (!aij->donotstash) {
5794         if (roworiented) {
5795           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5796         } else {
5797           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5798         }
5799       }
5800     }
5801   }
5802   PetscFunctionReturnVoid();
5803 }
5804 
5805