xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision efd4aadf157bf1ba2d80c2be092fcf4247860003)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   /* do nondiagonal part */
1048   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1049   /* send it on its way */
1050   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   /* do local part */
1052   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1053   /* receive remote parts */
1054   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /*
1059   This only works correctly for square matrices where the subblock A->A is the
1060    diagonal block
1061 */
1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1063 {
1064   PetscErrorCode ierr;
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066 
1067   PetscFunctionBegin;
1068   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1069   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1070   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1075 {
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1081   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1086 {
1087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091 #if defined(PETSC_USE_LOG)
1092   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1093 #endif
1094   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1095   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1096   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1098 #if defined(PETSC_USE_CTABLE)
1099   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1100 #else
1101   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1102 #endif
1103   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1105   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1106   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1107   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1108   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1109 
1110   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1111   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1112   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1118 #if defined(PETSC_HAVE_ELEMENTAL)
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1120 #endif
1121 #if defined(PETSC_HAVE_HYPRE)
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1124 #endif
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1129 {
1130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1131   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1132   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1133   PetscErrorCode ierr;
1134   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1135   int            fd;
1136   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1137   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1138   PetscScalar    *column_values;
1139   PetscInt       message_count,flowcontrolcount;
1140   FILE           *file;
1141 
1142   PetscFunctionBegin;
1143   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1144   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1145   nz   = A->nz + B->nz;
1146   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1147   if (!rank) {
1148     header[0] = MAT_FILE_CLASSID;
1149     header[1] = mat->rmap->N;
1150     header[2] = mat->cmap->N;
1151 
1152     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1153     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1154     /* get largest number of rows any processor has */
1155     rlen  = mat->rmap->n;
1156     range = mat->rmap->range;
1157     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1158   } else {
1159     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     rlen = mat->rmap->n;
1161   }
1162 
1163   /* load up the local row counts */
1164   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1165   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1166 
1167   /* store the row lengths to the file */
1168   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1169   if (!rank) {
1170     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     for (i=1; i<size; i++) {
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       rlen = range[i+1] - range[i];
1174       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1178   } else {
1179     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1180     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1184 
1185   /* load up the local column indices */
1186   nzmax = nz; /* th processor needs space a largest processor needs */
1187   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1189   cnt   = 0;
1190   for (i=0; i<mat->rmap->n; i++) {
1191     for (j=B->i[i]; j<B->i[i+1]; j++) {
1192       if ((col = garray[B->j[j]]) > cstart) break;
1193       column_indices[cnt++] = col;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1196     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1197   }
1198   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1199 
1200   /* store the column indices to the file */
1201   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1202   if (!rank) {
1203     MPI_Status status;
1204     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     for (i=1; i<size; i++) {
1206       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1207       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1208       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1209       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1218   }
1219   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1220 
1221   /* load up the local column values */
1222   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1223   cnt  = 0;
1224   for (i=0; i<mat->rmap->n; i++) {
1225     for (j=B->i[i]; j<B->i[i+1]; j++) {
1226       if (garray[B->j[j]] > cstart) break;
1227       column_values[cnt++] = B->a[j];
1228     }
1229     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1230     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1231   }
1232   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1233 
1234   /* store the column values to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     MPI_Status status;
1238     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1242       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1243       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1245     }
1246     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1247   } else {
1248     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1249     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(column_values);CHKERRQ(ierr);
1254 
1255   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1256   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #include <petscdraw.h>
1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1264   PetscErrorCode    ierr;
1265   PetscMPIInt       rank = aij->rank,size = aij->size;
1266   PetscBool         isdraw,iascii,isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii) {
1275     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1276     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1277       MatInfo   info;
1278       PetscBool inodes;
1279 
1280       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1281       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1282       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1283       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1284       if (!inodes) {
1285         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1286                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1287       } else {
1288         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1289                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1290       }
1291       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1292       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1293       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1295       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1296       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1298       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1299       PetscFunctionReturn(0);
1300     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1301       PetscInt inodecount,inodelimit,*inodes;
1302       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1303       if (inodes) {
1304         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1305       } else {
1306         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1307       }
1308       PetscFunctionReturn(0);
1309     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1310       PetscFunctionReturn(0);
1311     }
1312   } else if (isbinary) {
1313     if (size == 1) {
1314       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1315       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1316     } else {
1317       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318     }
1319     PetscFunctionReturn(0);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1324     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1325     if (isnull) PetscFunctionReturn(0);
1326   }
1327 
1328   {
1329     /* assemble the entire matrix onto first processor. */
1330     Mat        A;
1331     Mat_SeqAIJ *Aloc;
1332     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1333     MatScalar  *a;
1334 
1335     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1336     if (!rank) {
1337       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1340     }
1341     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1342     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1343     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1344     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1345     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1346 
1347     /* copy over the A part */
1348     Aloc = (Mat_SeqAIJ*)aij->A->data;
1349     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1350     row  = mat->rmap->rstart;
1351     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1352     for (i=0; i<m; i++) {
1353       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1354       row++;
1355       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1356     }
1357     aj = Aloc->j;
1358     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1359 
1360     /* copy over the B part */
1361     Aloc = (Mat_SeqAIJ*)aij->B->data;
1362     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1363     row  = mat->rmap->rstart;
1364     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1365     ct   = cols;
1366     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1367     for (i=0; i<m; i++) {
1368       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1369       row++;
1370       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1371     }
1372     ierr = PetscFree(ct);CHKERRQ(ierr);
1373     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     /*
1376        Everyone has to call to draw the matrix since the graphics waits are
1377        synchronized across all processors that share the PetscDraw object
1378     */
1379     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1380     if (!rank) {
1381       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1382       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1383     }
1384     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1385     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1386     ierr = MatDestroy(&A);CHKERRQ(ierr);
1387   }
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1392 {
1393   PetscErrorCode ierr;
1394   PetscBool      iascii,isdraw,issocket,isbinary;
1395 
1396   PetscFunctionBegin;
1397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1401   if (iascii || isdraw || isbinary || issocket) {
1402     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1408 {
1409   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1410   PetscErrorCode ierr;
1411   Vec            bb1 = 0;
1412   PetscBool      hasop;
1413 
1414   PetscFunctionBegin;
1415   if (flag == SOR_APPLY_UPPER) {
1416     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1417     PetscFunctionReturn(0);
1418   }
1419 
1420   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1421     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1422   }
1423 
1424   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1425     if (flag & SOR_ZERO_INITIAL_GUESS) {
1426       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1427       its--;
1428     }
1429 
1430     while (its--) {
1431       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1432       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1433 
1434       /* update rhs: bb1 = bb - B*x */
1435       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1436       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1437 
1438       /* local sweep */
1439       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1440     }
1441   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1444       its--;
1445     }
1446     while (its--) {
1447       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1452       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1453 
1454       /* local sweep */
1455       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1456     }
1457   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1458     if (flag & SOR_ZERO_INITIAL_GUESS) {
1459       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1460       its--;
1461     }
1462     while (its--) {
1463       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465 
1466       /* update rhs: bb1 = bb - B*x */
1467       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1468       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1469 
1470       /* local sweep */
1471       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1472     }
1473   } else if (flag & SOR_EISENSTAT) {
1474     Vec xx1;
1475 
1476     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1477     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1478 
1479     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481     if (!mat->diag) {
1482       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1483       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1484     }
1485     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1486     if (hasop) {
1487       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1488     } else {
1489       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1490     }
1491     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1492 
1493     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1494 
1495     /* local sweep */
1496     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1497     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1498     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1499   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1500 
1501   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1502 
1503   matin->factorerrortype = mat->A->factorerrortype;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1508 {
1509   Mat            aA,aB,Aperm;
1510   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1511   PetscScalar    *aa,*ba;
1512   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1513   PetscSF        rowsf,sf;
1514   IS             parcolp = NULL;
1515   PetscBool      done;
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1520   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1521   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1522   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1523 
1524   /* Invert row permutation to find out where my rows should go */
1525   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1526   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1527   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1528   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1529   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1531 
1532   /* Invert column permutation to find out where my columns should go */
1533   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1534   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1535   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1536   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1537   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1538   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1540 
1541   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1542   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1543   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1544 
1545   /* Find out where my gcols should go */
1546   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1547   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1548   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1549   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1550   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1551   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1552   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1553   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1554 
1555   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1556   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1557   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1558   for (i=0; i<m; i++) {
1559     PetscInt row = rdest[i],rowner;
1560     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1561     for (j=ai[i]; j<ai[i+1]; j++) {
1562       PetscInt cowner,col = cdest[aj[j]];
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567     for (j=bi[i]; j<bi[i+1]; j++) {
1568       PetscInt cowner,col = gcdest[bj[j]];
1569       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1570       if (rowner == cowner) dnnz[i]++;
1571       else onnz[i]++;
1572     }
1573   }
1574   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1575   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1579 
1580   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1581   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1582   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1585     PetscInt j0,rowlen;
1586     rowlen = ai[i+1] - ai[i];
1587     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1588       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1589       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1590     }
1591     rowlen = bi[i+1] - bi[i];
1592     for (j0=j=0; j<rowlen; j0=j) {
1593       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1594       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1595     }
1596   }
1597   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1598   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1600   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1601   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1602   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1603   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1604   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1605   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1606   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1607   *B = Aperm;
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1612 {
1613   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1614   PetscErrorCode ierr;
1615 
1616   PetscFunctionBegin;
1617   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1618   if (ghosts) *ghosts = aij->garray;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   Mat            A    = mat->A,B = mat->B;
1626   PetscErrorCode ierr;
1627   PetscReal      isend[5],irecv[5];
1628 
1629   PetscFunctionBegin;
1630   info->block_size = 1.0;
1631   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1632 
1633   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1634   isend[3] = info->memory;  isend[4] = info->mallocs;
1635 
1636   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1637 
1638   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1639   isend[3] += info->memory;  isend[4] += info->mallocs;
1640   if (flag == MAT_LOCAL) {
1641     info->nz_used      = isend[0];
1642     info->nz_allocated = isend[1];
1643     info->nz_unneeded  = isend[2];
1644     info->memory       = isend[3];
1645     info->mallocs      = isend[4];
1646   } else if (flag == MAT_GLOBAL_MAX) {
1647     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1648 
1649     info->nz_used      = irecv[0];
1650     info->nz_allocated = irecv[1];
1651     info->nz_unneeded  = irecv[2];
1652     info->memory       = irecv[3];
1653     info->mallocs      = irecv[4];
1654   } else if (flag == MAT_GLOBAL_SUM) {
1655     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1656 
1657     info->nz_used      = irecv[0];
1658     info->nz_allocated = irecv[1];
1659     info->nz_unneeded  = irecv[2];
1660     info->memory       = irecv[3];
1661     info->mallocs      = irecv[4];
1662   }
1663   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1664   info->fill_ratio_needed = 0;
1665   info->factor_mallocs    = 0;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1670 {
1671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   switch (op) {
1676   case MAT_NEW_NONZERO_LOCATIONS:
1677   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1678   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1679   case MAT_KEEP_NONZERO_PATTERN:
1680   case MAT_NEW_NONZERO_LOCATION_ERR:
1681   case MAT_USE_INODES:
1682   case MAT_IGNORE_ZERO_ENTRIES:
1683     MatCheckPreallocated(A,1);
1684     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1685     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1686     break;
1687   case MAT_ROW_ORIENTED:
1688     MatCheckPreallocated(A,1);
1689     a->roworiented = flg;
1690 
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1693     break;
1694   case MAT_NEW_DIAGONALS:
1695     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1696     break;
1697   case MAT_IGNORE_OFF_PROC_ENTRIES:
1698     a->donotstash = flg;
1699     break;
1700   case MAT_SPD:
1701     A->spd_set = PETSC_TRUE;
1702     A->spd     = flg;
1703     if (flg) {
1704       A->symmetric                  = PETSC_TRUE;
1705       A->structurally_symmetric     = PETSC_TRUE;
1706       A->symmetric_set              = PETSC_TRUE;
1707       A->structurally_symmetric_set = PETSC_TRUE;
1708     }
1709     break;
1710   case MAT_SYMMETRIC:
1711     MatCheckPreallocated(A,1);
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715     MatCheckPreallocated(A,1);
1716     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1717     break;
1718   case MAT_HERMITIAN:
1719     MatCheckPreallocated(A,1);
1720     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1721     break;
1722   case MAT_SYMMETRY_ETERNAL:
1723     MatCheckPreallocated(A,1);
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1739 {
1740   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1741   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1742   PetscErrorCode ierr;
1743   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1744   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1745   PetscInt       *cmap,*idx_p;
1746 
1747   PetscFunctionBegin;
1748   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1749   mat->getrowactive = PETSC_TRUE;
1750 
1751   if (!mat->rowvalues && (idx || v)) {
1752     /*
1753         allocate enough space to hold information from the longest row.
1754     */
1755     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1756     PetscInt   max = 1,tmp;
1757     for (i=0; i<matin->rmap->n; i++) {
1758       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1759       if (max < tmp) max = tmp;
1760     }
1761     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1762   }
1763 
1764   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1765   lrow = row - rstart;
1766 
1767   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1768   if (!v)   {pvA = 0; pvB = 0;}
1769   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1770   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1771   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1772   nztot = nzA + nzB;
1773 
1774   cmap = mat->garray;
1775   if (v  || idx) {
1776     if (nztot) {
1777       /* Sort by increasing column numbers, assuming A and B already sorted */
1778       PetscInt imark = -1;
1779       if (v) {
1780         *v = v_p = mat->rowvalues;
1781         for (i=0; i<nzB; i++) {
1782           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1783           else break;
1784         }
1785         imark = i;
1786         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1787         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1788       }
1789       if (idx) {
1790         *idx = idx_p = mat->rowindices;
1791         if (imark > -1) {
1792           for (i=0; i<imark; i++) {
1793             idx_p[i] = cmap[cworkB[i]];
1794           }
1795         } else {
1796           for (i=0; i<nzB; i++) {
1797             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1798             else break;
1799           }
1800           imark = i;
1801         }
1802         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1803         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1804       }
1805     } else {
1806       if (idx) *idx = 0;
1807       if (v)   *v   = 0;
1808     }
1809   }
1810   *nz  = nztot;
1811   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1812   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1817 {
1818   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1819 
1820   PetscFunctionBegin;
1821   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1822   aij->getrowactive = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1827 {
1828   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1829   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1830   PetscErrorCode ierr;
1831   PetscInt       i,j,cstart = mat->cmap->rstart;
1832   PetscReal      sum = 0.0;
1833   MatScalar      *v;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1838   } else {
1839     if (type == NORM_FROBENIUS) {
1840       v = amat->a;
1841       for (i=0; i<amat->nz; i++) {
1842         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1843       }
1844       v = bmat->a;
1845       for (i=0; i<bmat->nz; i++) {
1846         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1847       }
1848       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1849       *norm = PetscSqrtReal(*norm);
1850       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1851     } else if (type == NORM_1) { /* max column norm */
1852       PetscReal *tmp,*tmp2;
1853       PetscInt  *jj,*garray = aij->garray;
1854       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1855       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1856       *norm = 0.0;
1857       v     = amat->a; jj = amat->j;
1858       for (j=0; j<amat->nz; j++) {
1859         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1860       }
1861       v = bmat->a; jj = bmat->j;
1862       for (j=0; j<bmat->nz; j++) {
1863         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1864       }
1865       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1866       for (j=0; j<mat->cmap->N; j++) {
1867         if (tmp2[j] > *norm) *norm = tmp2[j];
1868       }
1869       ierr = PetscFree(tmp);CHKERRQ(ierr);
1870       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1871       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1872     } else if (type == NORM_INFINITY) { /* max row norm */
1873       PetscReal ntemp = 0.0;
1874       for (j=0; j<aij->A->rmap->n; j++) {
1875         v   = amat->a + amat->i[j];
1876         sum = 0.0;
1877         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1878           sum += PetscAbsScalar(*v); v++;
1879         }
1880         v = bmat->a + bmat->i[j];
1881         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1882           sum += PetscAbsScalar(*v); v++;
1883         }
1884         if (sum > ntemp) ntemp = sum;
1885       }
1886       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1887       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1888     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1894 {
1895   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1896   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1897   PetscErrorCode ierr;
1898   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1899   PetscInt       cstart = A->cmap->rstart,ncol;
1900   Mat            B;
1901   MatScalar      *array;
1902 
1903   PetscFunctionBegin;
1904   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1905 
1906   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1907   ai = Aloc->i; aj = Aloc->j;
1908   bi = Bloc->i; bj = Bloc->j;
1909   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1910     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1911     PetscSFNode          *oloc;
1912     PETSC_UNUSED PetscSF sf;
1913 
1914     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1915     /* compute d_nnz for preallocation */
1916     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1917     for (i=0; i<ai[ma]; i++) {
1918       d_nnz[aj[i]]++;
1919       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1920     }
1921     /* compute local off-diagonal contributions */
1922     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1923     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1924     /* map those to global */
1925     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1926     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1927     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1928     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1929     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1930     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1931     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1932 
1933     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1934     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1935     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1936     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1937     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1938     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1939   } else {
1940     B    = *matout;
1941     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1942     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1943   }
1944 
1945   /* copy over the A part */
1946   array = Aloc->a;
1947   row   = A->rmap->rstart;
1948   for (i=0; i<ma; i++) {
1949     ncol = ai[i+1]-ai[i];
1950     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1951     row++;
1952     array += ncol; aj += ncol;
1953   }
1954   aj = Aloc->j;
1955   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1956 
1957   /* copy over the B part */
1958   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1959   array = Bloc->a;
1960   row   = A->rmap->rstart;
1961   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i=0; i<mb; i++) {
1964     ncol = bi[i+1]-bi[i];
1965     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1966     row++;
1967     array += ncol; cols_tmp += ncol;
1968   }
1969   ierr = PetscFree(cols);CHKERRQ(ierr);
1970 
1971   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1974     *matout = B;
1975   } else {
1976     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1982 {
1983   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1984   Mat            a    = aij->A,b = aij->B;
1985   PetscErrorCode ierr;
1986   PetscInt       s1,s2,s3;
1987 
1988   PetscFunctionBegin;
1989   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1990   if (rr) {
1991     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1992     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1993     /* Overlap communication with computation. */
1994     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1995   }
1996   if (ll) {
1997     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1998     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1999     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2000   }
2001   /* scale  the diagonal block */
2002   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2003 
2004   if (rr) {
2005     /* Do a scatter end and then right scale the off-diagonal block */
2006     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2007     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2013 {
2014   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2023 {
2024   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2025   Mat            a,b,c,d;
2026   PetscBool      flg;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   a = matA->A; b = matA->B;
2031   c = matB->A; d = matB->B;
2032 
2033   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2034   if (flg) {
2035     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2036   }
2037   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2042 {
2043   PetscErrorCode ierr;
2044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2045   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2046 
2047   PetscFunctionBegin;
2048   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2049   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2050     /* because of the column compression in the off-processor part of the matrix a->B,
2051        the number of columns in a->B and b->B may be different, hence we cannot call
2052        the MatCopy() directly on the two parts. If need be, we can provide a more
2053        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2054        then copying the submatrices */
2055     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2056   } else {
2057     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2058     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2059   }
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2064 {
2065   PetscErrorCode ierr;
2066 
2067   PetscFunctionBegin;
2068   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 /*
2073    Computes the number of nonzeros per row needed for preallocation when X and Y
2074    have different nonzero structure.
2075 */
2076 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2077 {
2078   PetscInt       i,j,k,nzx,nzy;
2079 
2080   PetscFunctionBegin;
2081   /* Set the number of nonzeros in the new matrix */
2082   for (i=0; i<m; i++) {
2083     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2084     nzx = xi[i+1] - xi[i];
2085     nzy = yi[i+1] - yi[i];
2086     nnz[i] = 0;
2087     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2088       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2089       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2090       nnz[i]++;
2091     }
2092     for (; k<nzy; k++) nnz[i]++;
2093   }
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2098 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2099 {
2100   PetscErrorCode ierr;
2101   PetscInt       m = Y->rmap->N;
2102   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2103   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2104 
2105   PetscFunctionBegin;
2106   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2111 {
2112   PetscErrorCode ierr;
2113   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2114   PetscBLASInt   bnz,one=1;
2115   Mat_SeqAIJ     *x,*y;
2116 
2117   PetscFunctionBegin;
2118   if (str == SAME_NONZERO_PATTERN) {
2119     PetscScalar alpha = a;
2120     x    = (Mat_SeqAIJ*)xx->A->data;
2121     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2122     y    = (Mat_SeqAIJ*)yy->A->data;
2123     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2124     x    = (Mat_SeqAIJ*)xx->B->data;
2125     y    = (Mat_SeqAIJ*)yy->B->data;
2126     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2127     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2128     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2129   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2130     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2131   } else {
2132     Mat      B;
2133     PetscInt *nnz_d,*nnz_o;
2134     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2135     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2136     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2137     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2138     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2139     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2140     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2141     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2142     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2143     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2144     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2145     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2146     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2147     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2148   }
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2153 
2154 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2155 {
2156 #if defined(PETSC_USE_COMPLEX)
2157   PetscErrorCode ierr;
2158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2162   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2163 #else
2164   PetscFunctionBegin;
2165 #endif
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2170 {
2171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2176   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2181 {
2182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2187   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2192 {
2193   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2194   PetscErrorCode ierr;
2195   PetscInt       i,*idxb = 0;
2196   PetscScalar    *va,*vb;
2197   Vec            vtmp;
2198 
2199   PetscFunctionBegin;
2200   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2201   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2202   if (idx) {
2203     for (i=0; i<A->rmap->n; i++) {
2204       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2205     }
2206   }
2207 
2208   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2209   if (idx) {
2210     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2211   }
2212   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2213   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2214 
2215   for (i=0; i<A->rmap->n; i++) {
2216     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2217       va[i] = vb[i];
2218       if (idx) idx[i] = a->garray[idxb[i]];
2219     }
2220   }
2221 
2222   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2223   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2224   ierr = PetscFree(idxb);CHKERRQ(ierr);
2225   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2230 {
2231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2232   PetscErrorCode ierr;
2233   PetscInt       i,*idxb = 0;
2234   PetscScalar    *va,*vb;
2235   Vec            vtmp;
2236 
2237   PetscFunctionBegin;
2238   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2239   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2240   if (idx) {
2241     for (i=0; i<A->cmap->n; i++) {
2242       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2243     }
2244   }
2245 
2246   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2247   if (idx) {
2248     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2249   }
2250   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2251   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2252 
2253   for (i=0; i<A->rmap->n; i++) {
2254     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2255       va[i] = vb[i];
2256       if (idx) idx[i] = a->garray[idxb[i]];
2257     }
2258   }
2259 
2260   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2261   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2262   ierr = PetscFree(idxb);CHKERRQ(ierr);
2263   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2268 {
2269   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2270   PetscInt       n      = A->rmap->n;
2271   PetscInt       cstart = A->cmap->rstart;
2272   PetscInt       *cmap  = mat->garray;
2273   PetscInt       *diagIdx, *offdiagIdx;
2274   Vec            diagV, offdiagV;
2275   PetscScalar    *a, *diagA, *offdiagA;
2276   PetscInt       r;
2277   PetscErrorCode ierr;
2278 
2279   PetscFunctionBegin;
2280   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2281   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2282   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2283   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2284   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2285   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2286   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2287   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2288   for (r = 0; r < n; ++r) {
2289     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2290       a[r]   = diagA[r];
2291       idx[r] = cstart + diagIdx[r];
2292     } else {
2293       a[r]   = offdiagA[r];
2294       idx[r] = cmap[offdiagIdx[r]];
2295     }
2296   }
2297   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2299   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2300   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2301   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2302   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2307 {
2308   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2309   PetscInt       n      = A->rmap->n;
2310   PetscInt       cstart = A->cmap->rstart;
2311   PetscInt       *cmap  = mat->garray;
2312   PetscInt       *diagIdx, *offdiagIdx;
2313   Vec            diagV, offdiagV;
2314   PetscScalar    *a, *diagA, *offdiagA;
2315   PetscInt       r;
2316   PetscErrorCode ierr;
2317 
2318   PetscFunctionBegin;
2319   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2320   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2321   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2322   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2323   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2324   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2325   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2326   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2327   for (r = 0; r < n; ++r) {
2328     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2329       a[r]   = diagA[r];
2330       idx[r] = cstart + diagIdx[r];
2331     } else {
2332       a[r]   = offdiagA[r];
2333       idx[r] = cmap[offdiagIdx[r]];
2334     }
2335   }
2336   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2337   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2338   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2339   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2340   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2341   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2346 {
2347   PetscErrorCode ierr;
2348   Mat            *dummy;
2349 
2350   PetscFunctionBegin;
2351   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2352   *newmat = *dummy;
2353   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2358 {
2359   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2360   PetscErrorCode ierr;
2361 
2362   PetscFunctionBegin;
2363   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2364   A->factorerrortype = a->A->factorerrortype;
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2369 {
2370   PetscErrorCode ierr;
2371   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2372 
2373   PetscFunctionBegin;
2374   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2375   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2376   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2377   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2382 {
2383   PetscFunctionBegin;
2384   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2385   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 /*@
2390    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2391 
2392    Collective on Mat
2393 
2394    Input Parameters:
2395 +    A - the matrix
2396 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2397 
2398  Level: advanced
2399 
2400 @*/
2401 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2402 {
2403   PetscErrorCode       ierr;
2404 
2405   PetscFunctionBegin;
2406   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2411 {
2412   PetscErrorCode       ierr;
2413   PetscBool            sc = PETSC_FALSE,flg;
2414 
2415   PetscFunctionBegin;
2416   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2417   ierr = PetscObjectOptionsBegin((PetscObject)A);
2418     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2419     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2420     if (flg) {
2421       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2422     }
2423   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2428 {
2429   PetscErrorCode ierr;
2430   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2431   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2432 
2433   PetscFunctionBegin;
2434   if (!Y->preallocated) {
2435     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2436   } else if (!aij->nz) {
2437     PetscInt nonew = aij->nonew;
2438     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2439     aij->nonew = nonew;
2440   }
2441   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449 
2450   PetscFunctionBegin;
2451   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2452   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2453   if (d) {
2454     PetscInt rstart;
2455     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2456     *d += rstart;
2457 
2458   }
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 
2463 /* -------------------------------------------------------------------*/
2464 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2465                                        MatGetRow_MPIAIJ,
2466                                        MatRestoreRow_MPIAIJ,
2467                                        MatMult_MPIAIJ,
2468                                 /* 4*/ MatMultAdd_MPIAIJ,
2469                                        MatMultTranspose_MPIAIJ,
2470                                        MatMultTransposeAdd_MPIAIJ,
2471                                        0,
2472                                        0,
2473                                        0,
2474                                 /*10*/ 0,
2475                                        0,
2476                                        0,
2477                                        MatSOR_MPIAIJ,
2478                                        MatTranspose_MPIAIJ,
2479                                 /*15*/ MatGetInfo_MPIAIJ,
2480                                        MatEqual_MPIAIJ,
2481                                        MatGetDiagonal_MPIAIJ,
2482                                        MatDiagonalScale_MPIAIJ,
2483                                        MatNorm_MPIAIJ,
2484                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2485                                        MatAssemblyEnd_MPIAIJ,
2486                                        MatSetOption_MPIAIJ,
2487                                        MatZeroEntries_MPIAIJ,
2488                                 /*24*/ MatZeroRows_MPIAIJ,
2489                                        0,
2490                                        0,
2491                                        0,
2492                                        0,
2493                                 /*29*/ MatSetUp_MPIAIJ,
2494                                        0,
2495                                        0,
2496                                        MatGetDiagonalBlock_MPIAIJ,
2497                                        0,
2498                                 /*34*/ MatDuplicate_MPIAIJ,
2499                                        0,
2500                                        0,
2501                                        0,
2502                                        0,
2503                                 /*39*/ MatAXPY_MPIAIJ,
2504                                        MatCreateSubMatrices_MPIAIJ,
2505                                        MatIncreaseOverlap_MPIAIJ,
2506                                        MatGetValues_MPIAIJ,
2507                                        MatCopy_MPIAIJ,
2508                                 /*44*/ MatGetRowMax_MPIAIJ,
2509                                        MatScale_MPIAIJ,
2510                                        MatShift_MPIAIJ,
2511                                        MatDiagonalSet_MPIAIJ,
2512                                        MatZeroRowsColumns_MPIAIJ,
2513                                 /*49*/ MatSetRandom_MPIAIJ,
2514                                        0,
2515                                        0,
2516                                        0,
2517                                        0,
2518                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2519                                        0,
2520                                        MatSetUnfactored_MPIAIJ,
2521                                        MatPermute_MPIAIJ,
2522                                        0,
2523                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2524                                        MatDestroy_MPIAIJ,
2525                                        MatView_MPIAIJ,
2526                                        0,
2527                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2528                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2529                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2530                                        0,
2531                                        0,
2532                                        0,
2533                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2534                                        MatGetRowMinAbs_MPIAIJ,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                 /*75*/ MatFDColoringApply_AIJ,
2540                                        MatSetFromOptions_MPIAIJ,
2541                                        0,
2542                                        0,
2543                                        MatFindZeroDiagonals_MPIAIJ,
2544                                 /*80*/ 0,
2545                                        0,
2546                                        0,
2547                                 /*83*/ MatLoad_MPIAIJ,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2554                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2555                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2556                                        MatPtAP_MPIAIJ_MPIAIJ,
2557                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2558                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                        0,
2563                                 /*99*/ 0,
2564                                        0,
2565                                        0,
2566                                        MatConjugate_MPIAIJ,
2567                                        0,
2568                                 /*104*/MatSetValuesRow_MPIAIJ,
2569                                        MatRealPart_MPIAIJ,
2570                                        MatImaginaryPart_MPIAIJ,
2571                                        0,
2572                                        0,
2573                                 /*109*/0,
2574                                        0,
2575                                        MatGetRowMin_MPIAIJ,
2576                                        0,
2577                                        MatMissingDiagonal_MPIAIJ,
2578                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2579                                        0,
2580                                        MatGetGhosts_MPIAIJ,
2581                                        0,
2582                                        0,
2583                                 /*119*/0,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                        MatGetMultiProcBlock_MPIAIJ,
2588                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2589                                        MatGetColumnNorms_MPIAIJ,
2590                                        MatInvertBlockDiagonal_MPIAIJ,
2591                                        0,
2592                                        MatCreateSubMatricesMPI_MPIAIJ,
2593                                 /*129*/0,
2594                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2595                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2596                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2597                                        0,
2598                                 /*134*/0,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                 /*139*/MatSetBlockSizes_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                        MatFDColoringSetUp_MPIXAIJ,
2607                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2608                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2609 };
2610 
2611 /* ----------------------------------------------------------------------------------------*/
2612 
2613 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2614 {
2615   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2616   PetscErrorCode ierr;
2617 
2618   PetscFunctionBegin;
2619   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2620   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2625 {
2626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2627   PetscErrorCode ierr;
2628 
2629   PetscFunctionBegin;
2630   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2631   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2636 {
2637   Mat_MPIAIJ     *b;
2638   PetscErrorCode ierr;
2639 
2640   PetscFunctionBegin;
2641   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2642   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2643   b = (Mat_MPIAIJ*)B->data;
2644 
2645 #if defined(PETSC_USE_CTABLE)
2646   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2647 #else
2648   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2649 #endif
2650   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2651   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2652   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2653 
2654   /* Because the B will have been resized we simply destroy it and create a new one each time */
2655   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2656   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2657   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2658   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2659   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2660   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2661 
2662   if (!B->preallocated) {
2663     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2664     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2665     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2666     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2667     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2668   }
2669 
2670   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2671   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2672   B->preallocated  = PETSC_TRUE;
2673   B->was_assembled = PETSC_FALSE;
2674   B->assembled     = PETSC_FALSE;;
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2679 {
2680   Mat            mat;
2681   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2682   PetscErrorCode ierr;
2683 
2684   PetscFunctionBegin;
2685   *newmat = 0;
2686   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2687   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2688   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2689   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2690   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2691   a       = (Mat_MPIAIJ*)mat->data;
2692 
2693   mat->factortype   = matin->factortype;
2694   mat->assembled    = PETSC_TRUE;
2695   mat->insertmode   = NOT_SET_VALUES;
2696   mat->preallocated = PETSC_TRUE;
2697 
2698   a->size         = oldmat->size;
2699   a->rank         = oldmat->rank;
2700   a->donotstash   = oldmat->donotstash;
2701   a->roworiented  = oldmat->roworiented;
2702   a->rowindices   = 0;
2703   a->rowvalues    = 0;
2704   a->getrowactive = PETSC_FALSE;
2705 
2706   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2707   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2708 
2709   if (oldmat->colmap) {
2710 #if defined(PETSC_USE_CTABLE)
2711     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2712 #else
2713     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2714     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2715     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2716 #endif
2717   } else a->colmap = 0;
2718   if (oldmat->garray) {
2719     PetscInt len;
2720     len  = oldmat->B->cmap->n;
2721     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2722     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2723     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2724   } else a->garray = 0;
2725 
2726   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2727   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2728   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2729   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2730   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2731   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2732   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2733   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2734   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2735   *newmat = mat;
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2740 {
2741   PetscScalar    *vals,*svals;
2742   MPI_Comm       comm;
2743   PetscErrorCode ierr;
2744   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2745   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2746   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2747   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2748   PetscInt       cend,cstart,n,*rowners;
2749   int            fd;
2750   PetscInt       bs = newMat->rmap->bs;
2751 
2752   PetscFunctionBegin;
2753   /* force binary viewer to load .info file if it has not yet done so */
2754   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2755   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2756   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2757   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2758   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2759   if (!rank) {
2760     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2761     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2762     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2763   }
2764 
2765   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2766   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2767   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2768   if (bs < 0) bs = 1;
2769 
2770   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2771   M    = header[1]; N = header[2];
2772 
2773   /* If global sizes are set, check if they are consistent with that given in the file */
2774   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2775   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2776 
2777   /* determine ownership of all (block) rows */
2778   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2779   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2780   else m = newMat->rmap->n; /* Set by user */
2781 
2782   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2783   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2784 
2785   /* First process needs enough room for process with most rows */
2786   if (!rank) {
2787     mmax = rowners[1];
2788     for (i=2; i<=size; i++) {
2789       mmax = PetscMax(mmax, rowners[i]);
2790     }
2791   } else mmax = -1;             /* unused, but compilers complain */
2792 
2793   rowners[0] = 0;
2794   for (i=2; i<=size; i++) {
2795     rowners[i] += rowners[i-1];
2796   }
2797   rstart = rowners[rank];
2798   rend   = rowners[rank+1];
2799 
2800   /* distribute row lengths to all processors */
2801   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2802   if (!rank) {
2803     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2804     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2805     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2806     for (j=0; j<m; j++) {
2807       procsnz[0] += ourlens[j];
2808     }
2809     for (i=1; i<size; i++) {
2810       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2811       /* calculate the number of nonzeros on each processor */
2812       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2813         procsnz[i] += rowlengths[j];
2814       }
2815       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2816     }
2817     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2818   } else {
2819     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2820   }
2821 
2822   if (!rank) {
2823     /* determine max buffer needed and allocate it */
2824     maxnz = 0;
2825     for (i=0; i<size; i++) {
2826       maxnz = PetscMax(maxnz,procsnz[i]);
2827     }
2828     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2829 
2830     /* read in my part of the matrix column indices  */
2831     nz   = procsnz[0];
2832     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2833     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2834 
2835     /* read in every one elses and ship off */
2836     for (i=1; i<size; i++) {
2837       nz   = procsnz[i];
2838       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2839       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2840     }
2841     ierr = PetscFree(cols);CHKERRQ(ierr);
2842   } else {
2843     /* determine buffer space needed for message */
2844     nz = 0;
2845     for (i=0; i<m; i++) {
2846       nz += ourlens[i];
2847     }
2848     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2849 
2850     /* receive message of column indices*/
2851     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2852   }
2853 
2854   /* determine column ownership if matrix is not square */
2855   if (N != M) {
2856     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2857     else n = newMat->cmap->n;
2858     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2859     cstart = cend - n;
2860   } else {
2861     cstart = rstart;
2862     cend   = rend;
2863     n      = cend - cstart;
2864   }
2865 
2866   /* loop over local rows, determining number of off diagonal entries */
2867   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2868   jj   = 0;
2869   for (i=0; i<m; i++) {
2870     for (j=0; j<ourlens[i]; j++) {
2871       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2872       jj++;
2873     }
2874   }
2875 
2876   for (i=0; i<m; i++) {
2877     ourlens[i] -= offlens[i];
2878   }
2879   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2880 
2881   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2882 
2883   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2884 
2885   for (i=0; i<m; i++) {
2886     ourlens[i] += offlens[i];
2887   }
2888 
2889   if (!rank) {
2890     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2891 
2892     /* read in my part of the matrix numerical values  */
2893     nz   = procsnz[0];
2894     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2895 
2896     /* insert into matrix */
2897     jj      = rstart;
2898     smycols = mycols;
2899     svals   = vals;
2900     for (i=0; i<m; i++) {
2901       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2902       smycols += ourlens[i];
2903       svals   += ourlens[i];
2904       jj++;
2905     }
2906 
2907     /* read in other processors and ship out */
2908     for (i=1; i<size; i++) {
2909       nz   = procsnz[i];
2910       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2911       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2912     }
2913     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2914   } else {
2915     /* receive numeric values */
2916     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2917 
2918     /* receive message of values*/
2919     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2920 
2921     /* insert into matrix */
2922     jj      = rstart;
2923     smycols = mycols;
2924     svals   = vals;
2925     for (i=0; i<m; i++) {
2926       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2927       smycols += ourlens[i];
2928       svals   += ourlens[i];
2929       jj++;
2930     }
2931   }
2932   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2933   ierr = PetscFree(vals);CHKERRQ(ierr);
2934   ierr = PetscFree(mycols);CHKERRQ(ierr);
2935   ierr = PetscFree(rowners);CHKERRQ(ierr);
2936   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2937   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 /* Not scalable because of ISAllGather() unless getting all columns. */
2942 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2943 {
2944   PetscErrorCode ierr;
2945   IS             iscol_local;
2946   PetscBool      isstride;
2947   PetscMPIInt    lisstride=0,gisstride;
2948 
2949   PetscFunctionBegin;
2950   /* check if we are grabbing all columns*/
2951   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2952 
2953   if (isstride) {
2954     PetscInt  start,len,mstart,mlen;
2955     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2956     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2957     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2958     if (mstart == start && mlen-mstart == len) lisstride = 1;
2959   }
2960 
2961   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2962   if (gisstride) {
2963     PetscInt N;
2964     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2965     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2966     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2967     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2968   } else {
2969     PetscInt cbs;
2970     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2971     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2972     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2973   }
2974 
2975   *isseq = iscol_local;
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 /*
2980  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
2981  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
2982 
2983  Input Parameters:
2984    mat - matrix
2985    isrow - parallel row index set; its local indices are a subset of local columns of mat,
2986            i.e., mat->rstart <= isrow[i] < mat->rend
2987    iscol - parallel column index set; its local indices are a subset of local columns of mat,
2988            i.e., mat->cstart <= iscol[i] < mat->cend
2989  Output Parameter:
2990    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
2991    iscol_o - sequential column index set for retrieving mat->B
2992    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
2993  */
2994 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
2995 {
2996   PetscErrorCode ierr;
2997   Vec            x,cmap;
2998   const PetscInt *is_idx;
2999   PetscScalar    *xarray,*cmaparray;
3000   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3001   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3002   Mat            B=a->B;
3003   Vec            lvec=a->lvec,lcmap;
3004   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3005   MPI_Comm       comm;
3006 
3007   PetscFunctionBegin;
3008   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3009   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3010 
3011   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3012   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3013   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3014   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3015 
3016   /* Get start indices */
3017   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3018   isstart -= ncols;
3019   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3020 
3021   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3022   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3023   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3024   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3025   for (i=0; i<ncols; i++) {
3026     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3027     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3028     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3029   }
3030   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3031   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3032   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3033 
3034   /* Get iscol_d */
3035   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3036   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3037   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3038 
3039   /* Get isrow_d */
3040   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3041   rstart = mat->rmap->rstart;
3042   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3043   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3044   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3045   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3046 
3047   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3048   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3049   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3050 
3051   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3052   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3053 
3054   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3055 
3056   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3057   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3058   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3059 
3060   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3061   /* off-process column indices */
3062   count = 0;
3063   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3064   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3065 
3066   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3067   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3068   for (i=0; i<Bn; i++) {
3069     if (PetscRealPart(xarray[i]) > -1.0) {
3070       idx[count]     = i;                   /* local column index in off-diagonal part B */
3071       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3072     }
3073   }
3074   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3075   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3076 
3077   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3078   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3079   ierr = ISSetBlockSize(*iscol_o,i);CHKERRQ(ierr);
3080   ierr = PetscFree(idx);CHKERRQ(ierr);
3081 
3082   *garray = cmap1;
3083 
3084   ierr = VecDestroy(&x);CHKERRQ(ierr);
3085   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3086   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3091 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3092 {
3093   PetscErrorCode ierr;
3094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3095   Mat            M = NULL;
3096   MPI_Comm       comm;
3097   IS             iscol_d,isrow_d,iscol_o;
3098   Mat            Asub = NULL,Bsub = NULL;
3099   PetscInt       n;
3100 
3101   PetscFunctionBegin;
3102   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3103 
3104   if (call == MAT_REUSE_MATRIX) {
3105     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3106     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3107     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3108 
3109     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3110     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3111 
3112     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3113     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3114 
3115     /* Update diagonal and off-diagonal portions of submat */
3116     asub = (Mat_MPIAIJ*)(*submat)->data;
3117     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3118     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3119     if (n) {
3120       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3121     }
3122     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3123     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3124 
3125   } else { /* call == MAT_INITIAL_MATRIX) */
3126     const PetscInt *garray;
3127     PetscInt        BsubN;
3128 
3129     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3130     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3131 
3132     /* Create local submatrices Asub and Bsub */
3133     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3134     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3135 
3136     /* Create submatrix M */
3137     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3138 
3139     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3140     asub = (Mat_MPIAIJ*)M->data;
3141 
3142     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3143     n = asub->B->cmap->N;
3144     if (BsubN > n) {
3145       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3146       const PetscInt *idx;
3147       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3148       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3149 
3150       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3151       j = 0;
3152       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3153       for (i=0; i<n; i++) {
3154         if (j >= BsubN) break;
3155         while (subgarray[i] > garray[j]) j++;
3156 
3157         if (subgarray[i] == garray[j]) {
3158           idx_new[i] = idx[j++];
3159         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3160       }
3161       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3162 
3163       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3164       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3165 
3166     } else if (BsubN < n) {
3167       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3168     }
3169 
3170     ierr = PetscFree(garray);CHKERRQ(ierr);
3171     *submat = M;
3172 
3173     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3174     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3175     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3176 
3177     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3178     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3179 
3180     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3181     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3182   }
3183   PetscFunctionReturn(0);
3184 }
3185 
3186 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3187 {
3188   PetscErrorCode ierr;
3189   IS             iscol_local,isrow_d;
3190   PetscInt       csize;
3191   PetscInt       n,i,j,start,end;
3192   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3193   MPI_Comm       comm;
3194 
3195   PetscFunctionBegin;
3196   /* If isrow has same processor distribution as mat,
3197      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3198   if (call == MAT_REUSE_MATRIX) {
3199     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3200     if (isrow_d) {
3201       sameRowDist  = PETSC_TRUE;
3202       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3203     } else {
3204       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3205       if (iscol_local) {
3206         sameRowDist  = PETSC_TRUE;
3207         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3208       }
3209     }
3210   } else {
3211     /* Check if isrow has same processor distribution as mat */
3212     sameDist[0] = PETSC_FALSE;
3213     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3214     if (!n) {
3215       sameDist[0] = PETSC_TRUE;
3216     } else {
3217       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3218       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3219       if (i >= start && j < end) {
3220         sameDist[0] = PETSC_TRUE;
3221       }
3222     }
3223 
3224     /* Check if iscol has same processor distribution as mat */
3225     sameDist[1] = PETSC_FALSE;
3226     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3227     if (!n) {
3228       sameDist[1] = PETSC_TRUE;
3229     } else {
3230       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3231       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3232       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3233     }
3234 
3235     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3236     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3237     sameRowDist = tsameDist[0];
3238   }
3239 
3240   if (sameRowDist) {
3241     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3242       /* isrow and iscol have same processor distribution as mat */
3243       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3244     } else { /* sameRowDist */
3245       /* isrow has same processor distribution as mat */
3246       ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3247     }
3248     PetscFunctionReturn(0);
3249   }
3250 
3251   /* General case: iscol -> iscol_local which has global size of iscol */
3252   if (call == MAT_REUSE_MATRIX) {
3253     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3254     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3255   } else {
3256     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3257   }
3258 
3259   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3260   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3261 
3262   if (call == MAT_INITIAL_MATRIX) {
3263     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3264     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3265   }
3266   PetscFunctionReturn(0);
3267 }
3268 
3269 /*@C
3270      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3271          and "off-diagonal" part of the matrix in CSR format.
3272 
3273    Collective on MPI_Comm
3274 
3275    Input Parameters:
3276 +  comm - MPI communicator
3277 .  A - "diagonal" portion of matrix
3278 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3279 -  garray - global index of B columns
3280 
3281    Output Parameter:
3282 .   mat - the matrix, with input A as its local diagonal matrix
3283    Level: advanced
3284 
3285    Notes:
3286        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3287        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3288 
3289 .seealso: MatCreateMPIAIJWithSplitArrays()
3290 @*/
3291 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3292 {
3293   PetscErrorCode ierr;
3294   Mat_MPIAIJ     *maij;
3295   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3296   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3297   PetscScalar    *oa=b->a;
3298   Mat            Bnew;
3299   PetscInt       m,n,N;
3300 
3301   PetscFunctionBegin;
3302   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3303   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3304   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3305   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3306   if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs);
3307 
3308   /* Get global columns of mat */
3309   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3310 
3311   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3312   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3313   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3314   maij = (Mat_MPIAIJ*)(*mat)->data;
3315 
3316   (*mat)->preallocated = PETSC_TRUE;
3317 
3318   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3319   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3320 
3321   /* Set A as diagonal portion of *mat */
3322   maij->A = A;
3323 
3324   nz = oi[m];
3325   for (i=0; i<nz; i++) {
3326     col   = oj[i];
3327     oj[i] = garray[col];
3328   }
3329 
3330    /* Set Bnew as off-diagonal portion of *mat */
3331   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3332   bnew        = (Mat_SeqAIJ*)Bnew->data;
3333   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3334   maij->B     = Bnew;
3335 
3336   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3337 
3338   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3339   b->free_a       = PETSC_FALSE;
3340   b->free_ij      = PETSC_FALSE;
3341   ierr = MatDestroy(&B);CHKERRQ(ierr);
3342 
3343   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3344   bnew->free_a       = PETSC_TRUE;
3345   bnew->free_ij      = PETSC_TRUE;
3346 
3347   /* condense columns of maij->B */
3348   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3349   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3350   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3351   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3352   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3357 
3358 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3359 {
3360   PetscErrorCode ierr;
3361   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3362   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3363   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3364   Mat            M,Msub,B=a->B;
3365   MatScalar      *aa;
3366   Mat_SeqAIJ     *aij;
3367   PetscInt       *garray = a->garray,*colsub,Ncols;
3368   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3369   IS             iscol_sub,iscmap;
3370   const PetscInt *is_idx,*cmap;
3371   PetscBool      allcolumns=PETSC_FALSE;
3372   IS             iscol_local=NULL;
3373   MPI_Comm       comm;
3374 
3375   PetscFunctionBegin;
3376   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3377 
3378   if (call == MAT_REUSE_MATRIX) {
3379     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3380     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3381     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3382 
3383     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3384     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3385 
3386     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3387     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3388 
3389     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3390 
3391   } else { /* call == MAT_INITIAL_MATRIX) */
3392     PetscBool flg;
3393 
3394     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3395     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3396 
3397     /* (1) iscol -> nonscalable iscol_local */
3398     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3399     ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3400     if (n != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != Ncols %d",n,Ncols);
3401 
3402     /* Check for special case: each processor gets entire matrix columns */
3403     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3404     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3405     if (allcolumns) {
3406       iscol_sub = iscol_local;
3407       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3408       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3409 
3410     } else {
3411       /* (2) iscol_local -> iscol_sub and iscmap */
3412       PetscInt *idx,*cmap1,k,cbs;
3413 
3414       /* implementation below requires iscol_local be sorted, it can have duplicate indices */
3415       ierr = ISSorted(iscol_local,&flg);CHKERRQ(ierr);
3416       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsorted iscol_local is not implemented yet");
3417 
3418       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3419       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3420       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3421       count = 0;
3422       k     = 0;
3423       for (i=0; i<Ncols; i++) {
3424         j = is_idx[i];
3425         if (j >= cstart && j < cend) {
3426           /* diagonal part of mat */
3427           idx[count]     = j;
3428           cmap1[count++] = i; /* column index in submat */
3429         } else if (Bn) {
3430           /* off-diagonal part of mat */
3431           if (j == garray[k]) {
3432             idx[count]     = j;
3433             cmap1[count++] = i;  /* column index in submat */
3434           } else if (j > garray[k]) {
3435             while (j > garray[k] && k < Bn-1) k++;
3436             if (j == garray[k]) {
3437               idx[count]     = j;
3438               cmap1[count++] = i; /* column index in submat */
3439             }
3440           }
3441         }
3442       }
3443       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3444 
3445       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3446       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3447       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3448 
3449       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3450     }
3451 
3452     /* (3) Create sequential Msub */
3453     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3454   }
3455 
3456   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3457   aij  = (Mat_SeqAIJ*)(Msub)->data;
3458   ii   = aij->i;
3459   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3460 
3461   /*
3462       m - number of local rows
3463       Ncols - number of columns (same on all processors)
3464       rstart - first row in new global matrix generated
3465   */
3466   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3467 
3468   if (call == MAT_INITIAL_MATRIX) {
3469     /* (4) Create parallel newmat */
3470     PetscMPIInt    rank,size;
3471     PetscInt       csize;
3472 
3473     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3474     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3475 
3476     /*
3477         Determine the number of non-zeros in the diagonal and off-diagonal
3478         portions of the matrix in order to do correct preallocation
3479     */
3480 
3481     /* first get start and end of "diagonal" columns */
3482     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3483     if (csize == PETSC_DECIDE) {
3484       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3485       if (mglobal == Ncols) { /* square matrix */
3486         nlocal = m;
3487       } else {
3488         nlocal = Ncols/size + ((Ncols % size) > rank);
3489       }
3490     } else {
3491       nlocal = csize;
3492     }
3493     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3494     rstart = rend - nlocal;
3495     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3496 
3497     /* next, compute all the lengths */
3498     jj    = aij->j;
3499     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3500     olens = dlens + m;
3501     for (i=0; i<m; i++) {
3502       jend = ii[i+1] - ii[i];
3503       olen = 0;
3504       dlen = 0;
3505       for (j=0; j<jend; j++) {
3506         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3507         else dlen++;
3508         jj++;
3509       }
3510       olens[i] = olen;
3511       dlens[i] = dlen;
3512     }
3513     ierr = MatGetBlockSizes(Msub,&bs,&cbs);CHKERRQ(ierr);
3514 
3515     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3516     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3517     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3518     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3519     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3520     ierr = PetscFree(dlens);CHKERRQ(ierr);
3521 
3522   } else { /* call == MAT_REUSE_MATRIX */
3523     M    = *newmat;
3524     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3525     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3526     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3527     /*
3528          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3529        rather than the slower MatSetValues().
3530     */
3531     M->was_assembled = PETSC_TRUE;
3532     M->assembled     = PETSC_FALSE;
3533   }
3534 
3535   /* (5) Set values of Msub to *newmat */
3536   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3537   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3538 
3539   jj   = aij->j;
3540   aa   = aij->a;
3541   for (i=0; i<m; i++) {
3542     row = rstart + i;
3543     nz  = ii[i+1] - ii[i];
3544     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3545     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3546     jj += nz; aa += nz;
3547   }
3548   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3549 
3550   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3551   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3552 
3553   ierr = PetscFree(colsub);CHKERRQ(ierr);
3554 
3555   /* save Msub, iscol_sub and iscmap used in processor for next request */
3556   if (call ==  MAT_INITIAL_MATRIX) {
3557     *newmat = M;
3558     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3559     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3560 
3561     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3562     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3563 
3564     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3565     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3566 
3567     if (iscol_local) {
3568       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3569       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3570     }
3571   }
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 /*
3576     Not great since it makes two copies of the submatrix, first an SeqAIJ
3577   in local and then by concatenating the local matrices the end result.
3578   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3579 
3580   Note: This requires a sequential iscol with all indices.
3581 */
3582 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3583 {
3584   PetscErrorCode ierr;
3585   PetscMPIInt    rank,size;
3586   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3587   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3588   Mat            M,Mreuse;
3589   MatScalar      *aa,*vwork;
3590   MPI_Comm       comm;
3591   Mat_SeqAIJ     *aij;
3592   PetscBool      colflag,allcolumns=PETSC_FALSE;
3593 
3594   PetscFunctionBegin;
3595   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3596   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3597   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3598 
3599   /* Check for special case: each processor gets entire matrix columns */
3600   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3601   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3602   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3603 
3604   if (call ==  MAT_REUSE_MATRIX) {
3605     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3606     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3607     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3608   } else {
3609     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3610   }
3611 
3612   /*
3613       m - number of local rows
3614       n - number of columns (same on all processors)
3615       rstart - first row in new global matrix generated
3616   */
3617   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3618   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3619   if (call == MAT_INITIAL_MATRIX) {
3620     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3621     ii  = aij->i;
3622     jj  = aij->j;
3623 
3624     /*
3625         Determine the number of non-zeros in the diagonal and off-diagonal
3626         portions of the matrix in order to do correct preallocation
3627     */
3628 
3629     /* first get start and end of "diagonal" columns */
3630     if (csize == PETSC_DECIDE) {
3631       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3632       if (mglobal == n) { /* square matrix */
3633         nlocal = m;
3634       } else {
3635         nlocal = n/size + ((n % size) > rank);
3636       }
3637     } else {
3638       nlocal = csize;
3639     }
3640     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3641     rstart = rend - nlocal;
3642     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3643 
3644     /* next, compute all the lengths */
3645     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3646     olens = dlens + m;
3647     for (i=0; i<m; i++) {
3648       jend = ii[i+1] - ii[i];
3649       olen = 0;
3650       dlen = 0;
3651       for (j=0; j<jend; j++) {
3652         if (*jj < rstart || *jj >= rend) olen++;
3653         else dlen++;
3654         jj++;
3655       }
3656       olens[i] = olen;
3657       dlens[i] = dlen;
3658     }
3659     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3660     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3661     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3662     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3663     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3664     ierr = PetscFree(dlens);CHKERRQ(ierr);
3665   } else {
3666     PetscInt ml,nl;
3667 
3668     M    = *newmat;
3669     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3670     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3671     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3672     /*
3673          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3674        rather than the slower MatSetValues().
3675     */
3676     M->was_assembled = PETSC_TRUE;
3677     M->assembled     = PETSC_FALSE;
3678   }
3679   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3680   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3681   ii   = aij->i;
3682   jj   = aij->j;
3683   aa   = aij->a;
3684   for (i=0; i<m; i++) {
3685     row   = rstart + i;
3686     nz    = ii[i+1] - ii[i];
3687     cwork = jj;     jj += nz;
3688     vwork = aa;     aa += nz;
3689     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3690   }
3691 
3692   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3694   *newmat = M;
3695 
3696   /* save submatrix used in processor for next request */
3697   if (call ==  MAT_INITIAL_MATRIX) {
3698     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3699     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3700   }
3701   PetscFunctionReturn(0);
3702 }
3703 
3704 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3705 {
3706   PetscInt       m,cstart, cend,j,nnz,i,d;
3707   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3708   const PetscInt *JJ;
3709   PetscScalar    *values;
3710   PetscErrorCode ierr;
3711   PetscBool      nooffprocentries;
3712 
3713   PetscFunctionBegin;
3714   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3715 
3716   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3717   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3718   m      = B->rmap->n;
3719   cstart = B->cmap->rstart;
3720   cend   = B->cmap->rend;
3721   rstart = B->rmap->rstart;
3722 
3723   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3724 
3725 #if defined(PETSC_USE_DEBUGGING)
3726   for (i=0; i<m; i++) {
3727     nnz = Ii[i+1]- Ii[i];
3728     JJ  = J + Ii[i];
3729     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3730     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3731     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3732   }
3733 #endif
3734 
3735   for (i=0; i<m; i++) {
3736     nnz     = Ii[i+1]- Ii[i];
3737     JJ      = J + Ii[i];
3738     nnz_max = PetscMax(nnz_max,nnz);
3739     d       = 0;
3740     for (j=0; j<nnz; j++) {
3741       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3742     }
3743     d_nnz[i] = d;
3744     o_nnz[i] = nnz - d;
3745   }
3746   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3747   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3748 
3749   if (v) values = (PetscScalar*)v;
3750   else {
3751     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3752   }
3753 
3754   for (i=0; i<m; i++) {
3755     ii   = i + rstart;
3756     nnz  = Ii[i+1]- Ii[i];
3757     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3758   }
3759   nooffprocentries    = B->nooffprocentries;
3760   B->nooffprocentries = PETSC_TRUE;
3761   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3762   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3763   B->nooffprocentries = nooffprocentries;
3764 
3765   if (!v) {
3766     ierr = PetscFree(values);CHKERRQ(ierr);
3767   }
3768   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 /*@
3773    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3774    (the default parallel PETSc format).
3775 
3776    Collective on MPI_Comm
3777 
3778    Input Parameters:
3779 +  B - the matrix
3780 .  i - the indices into j for the start of each local row (starts with zero)
3781 .  j - the column indices for each local row (starts with zero)
3782 -  v - optional values in the matrix
3783 
3784    Level: developer
3785 
3786    Notes:
3787        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3788      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3789      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3790 
3791        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3792 
3793        The format which is used for the sparse matrix input, is equivalent to a
3794     row-major ordering.. i.e for the following matrix, the input data expected is
3795     as shown
3796 
3797 $        1 0 0
3798 $        2 0 3     P0
3799 $       -------
3800 $        4 5 6     P1
3801 $
3802 $     Process0 [P0]: rows_owned=[0,1]
3803 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3804 $        j =  {0,0,2}  [size = 3]
3805 $        v =  {1,2,3}  [size = 3]
3806 $
3807 $     Process1 [P1]: rows_owned=[2]
3808 $        i =  {0,3}    [size = nrow+1  = 1+1]
3809 $        j =  {0,1,2}  [size = 3]
3810 $        v =  {4,5,6}  [size = 3]
3811 
3812 .keywords: matrix, aij, compressed row, sparse, parallel
3813 
3814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3815           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3816 @*/
3817 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3818 {
3819   PetscErrorCode ierr;
3820 
3821   PetscFunctionBegin;
3822   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 /*@C
3827    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3828    (the default parallel PETSc format).  For good matrix assembly performance
3829    the user should preallocate the matrix storage by setting the parameters
3830    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3831    performance can be increased by more than a factor of 50.
3832 
3833    Collective on MPI_Comm
3834 
3835    Input Parameters:
3836 +  B - the matrix
3837 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3838            (same value is used for all local rows)
3839 .  d_nnz - array containing the number of nonzeros in the various rows of the
3840            DIAGONAL portion of the local submatrix (possibly different for each row)
3841            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3842            The size of this array is equal to the number of local rows, i.e 'm'.
3843            For matrices that will be factored, you must leave room for (and set)
3844            the diagonal entry even if it is zero.
3845 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3846            submatrix (same value is used for all local rows).
3847 -  o_nnz - array containing the number of nonzeros in the various rows of the
3848            OFF-DIAGONAL portion of the local submatrix (possibly different for
3849            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3850            structure. The size of this array is equal to the number
3851            of local rows, i.e 'm'.
3852 
3853    If the *_nnz parameter is given then the *_nz parameter is ignored
3854 
3855    The AIJ format (also called the Yale sparse matrix format or
3856    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3857    storage.  The stored row and column indices begin with zero.
3858    See Users-Manual: ch_mat for details.
3859 
3860    The parallel matrix is partitioned such that the first m0 rows belong to
3861    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3862    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3863 
3864    The DIAGONAL portion of the local submatrix of a processor can be defined
3865    as the submatrix which is obtained by extraction the part corresponding to
3866    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3867    first row that belongs to the processor, r2 is the last row belonging to
3868    the this processor, and c1-c2 is range of indices of the local part of a
3869    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3870    common case of a square matrix, the row and column ranges are the same and
3871    the DIAGONAL part is also square. The remaining portion of the local
3872    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3873 
3874    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3875 
3876    You can call MatGetInfo() to get information on how effective the preallocation was;
3877    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3878    You can also run with the option -info and look for messages with the string
3879    malloc in them to see if additional memory allocation was needed.
3880 
3881    Example usage:
3882 
3883    Consider the following 8x8 matrix with 34 non-zero values, that is
3884    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3885    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3886    as follows:
3887 
3888 .vb
3889             1  2  0  |  0  3  0  |  0  4
3890     Proc0   0  5  6  |  7  0  0  |  8  0
3891             9  0 10  | 11  0  0  | 12  0
3892     -------------------------------------
3893            13  0 14  | 15 16 17  |  0  0
3894     Proc1   0 18  0  | 19 20 21  |  0  0
3895             0  0  0  | 22 23  0  | 24  0
3896     -------------------------------------
3897     Proc2  25 26 27  |  0  0 28  | 29  0
3898            30  0  0  | 31 32 33  |  0 34
3899 .ve
3900 
3901    This can be represented as a collection of submatrices as:
3902 
3903 .vb
3904       A B C
3905       D E F
3906       G H I
3907 .ve
3908 
3909    Where the submatrices A,B,C are owned by proc0, D,E,F are
3910    owned by proc1, G,H,I are owned by proc2.
3911 
3912    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3913    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3914    The 'M','N' parameters are 8,8, and have the same values on all procs.
3915 
3916    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3917    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3918    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3919    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3920    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3921    matrix, ans [DF] as another SeqAIJ matrix.
3922 
3923    When d_nz, o_nz parameters are specified, d_nz storage elements are
3924    allocated for every row of the local diagonal submatrix, and o_nz
3925    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3926    One way to choose d_nz and o_nz is to use the max nonzerors per local
3927    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3928    In this case, the values of d_nz,o_nz are:
3929 .vb
3930      proc0 : dnz = 2, o_nz = 2
3931      proc1 : dnz = 3, o_nz = 2
3932      proc2 : dnz = 1, o_nz = 4
3933 .ve
3934    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3935    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3936    for proc3. i.e we are using 12+15+10=37 storage locations to store
3937    34 values.
3938 
3939    When d_nnz, o_nnz parameters are specified, the storage is specified
3940    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3941    In the above case the values for d_nnz,o_nnz are:
3942 .vb
3943      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3944      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3945      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3946 .ve
3947    Here the space allocated is sum of all the above values i.e 34, and
3948    hence pre-allocation is perfect.
3949 
3950    Level: intermediate
3951 
3952 .keywords: matrix, aij, compressed row, sparse, parallel
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3955           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3956 @*/
3957 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3958 {
3959   PetscErrorCode ierr;
3960 
3961   PetscFunctionBegin;
3962   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3963   PetscValidType(B,1);
3964   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 /*@
3969      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3970          CSR format the local rows.
3971 
3972    Collective on MPI_Comm
3973 
3974    Input Parameters:
3975 +  comm - MPI communicator
3976 .  m - number of local rows (Cannot be PETSC_DECIDE)
3977 .  n - This value should be the same as the local size used in creating the
3978        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3979        calculated if N is given) For square matrices n is almost always m.
3980 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3981 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3982 .   i - row indices
3983 .   j - column indices
3984 -   a - matrix values
3985 
3986    Output Parameter:
3987 .   mat - the matrix
3988 
3989    Level: intermediate
3990 
3991    Notes:
3992        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3993      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3994      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3995 
3996        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3997 
3998        The format which is used for the sparse matrix input, is equivalent to a
3999     row-major ordering.. i.e for the following matrix, the input data expected is
4000     as shown
4001 
4002 $        1 0 0
4003 $        2 0 3     P0
4004 $       -------
4005 $        4 5 6     P1
4006 $
4007 $     Process0 [P0]: rows_owned=[0,1]
4008 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4009 $        j =  {0,0,2}  [size = 3]
4010 $        v =  {1,2,3}  [size = 3]
4011 $
4012 $     Process1 [P1]: rows_owned=[2]
4013 $        i =  {0,3}    [size = nrow+1  = 1+1]
4014 $        j =  {0,1,2}  [size = 3]
4015 $        v =  {4,5,6}  [size = 3]
4016 
4017 .keywords: matrix, aij, compressed row, sparse, parallel
4018 
4019 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4020           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4021 @*/
4022 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4023 {
4024   PetscErrorCode ierr;
4025 
4026   PetscFunctionBegin;
4027   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4028   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4029   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4030   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4031   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4032   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4033   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4034   PetscFunctionReturn(0);
4035 }
4036 
4037 /*@C
4038    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4039    (the default parallel PETSc format).  For good matrix assembly performance
4040    the user should preallocate the matrix storage by setting the parameters
4041    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4042    performance can be increased by more than a factor of 50.
4043 
4044    Collective on MPI_Comm
4045 
4046    Input Parameters:
4047 +  comm - MPI communicator
4048 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4049            This value should be the same as the local size used in creating the
4050            y vector for the matrix-vector product y = Ax.
4051 .  n - This value should be the same as the local size used in creating the
4052        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4053        calculated if N is given) For square matrices n is almost always m.
4054 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4055 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4056 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4057            (same value is used for all local rows)
4058 .  d_nnz - array containing the number of nonzeros in the various rows of the
4059            DIAGONAL portion of the local submatrix (possibly different for each row)
4060            or NULL, if d_nz is used to specify the nonzero structure.
4061            The size of this array is equal to the number of local rows, i.e 'm'.
4062 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4063            submatrix (same value is used for all local rows).
4064 -  o_nnz - array containing the number of nonzeros in the various rows of the
4065            OFF-DIAGONAL portion of the local submatrix (possibly different for
4066            each row) or NULL, if o_nz is used to specify the nonzero
4067            structure. The size of this array is equal to the number
4068            of local rows, i.e 'm'.
4069 
4070    Output Parameter:
4071 .  A - the matrix
4072 
4073    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4074    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4075    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4076 
4077    Notes:
4078    If the *_nnz parameter is given then the *_nz parameter is ignored
4079 
4080    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4081    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4082    storage requirements for this matrix.
4083 
4084    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4085    processor than it must be used on all processors that share the object for
4086    that argument.
4087 
4088    The user MUST specify either the local or global matrix dimensions
4089    (possibly both).
4090 
4091    The parallel matrix is partitioned across processors such that the
4092    first m0 rows belong to process 0, the next m1 rows belong to
4093    process 1, the next m2 rows belong to process 2 etc.. where
4094    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4095    values corresponding to [m x N] submatrix.
4096 
4097    The columns are logically partitioned with the n0 columns belonging
4098    to 0th partition, the next n1 columns belonging to the next
4099    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4100 
4101    The DIAGONAL portion of the local submatrix on any given processor
4102    is the submatrix corresponding to the rows and columns m,n
4103    corresponding to the given processor. i.e diagonal matrix on
4104    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4105    etc. The remaining portion of the local submatrix [m x (N-n)]
4106    constitute the OFF-DIAGONAL portion. The example below better
4107    illustrates this concept.
4108 
4109    For a square global matrix we define each processor's diagonal portion
4110    to be its local rows and the corresponding columns (a square submatrix);
4111    each processor's off-diagonal portion encompasses the remainder of the
4112    local matrix (a rectangular submatrix).
4113 
4114    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4115 
4116    When calling this routine with a single process communicator, a matrix of
4117    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
4118    type of communicator, use the construction mechanism:
4119      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4120 
4121    By default, this format uses inodes (identical nodes) when possible.
4122    We search for consecutive rows with the same nonzero structure, thereby
4123    reusing matrix information to achieve increased efficiency.
4124 
4125    Options Database Keys:
4126 +  -mat_no_inode  - Do not use inodes
4127 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4128 -  -mat_aij_oneindex - Internally use indexing starting at 1
4129         rather than 0.  Note that when calling MatSetValues(),
4130         the user still MUST index entries starting at 0!
4131 
4132 
4133    Example usage:
4134 
4135    Consider the following 8x8 matrix with 34 non-zero values, that is
4136    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4137    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4138    as follows:
4139 
4140 .vb
4141             1  2  0  |  0  3  0  |  0  4
4142     Proc0   0  5  6  |  7  0  0  |  8  0
4143             9  0 10  | 11  0  0  | 12  0
4144     -------------------------------------
4145            13  0 14  | 15 16 17  |  0  0
4146     Proc1   0 18  0  | 19 20 21  |  0  0
4147             0  0  0  | 22 23  0  | 24  0
4148     -------------------------------------
4149     Proc2  25 26 27  |  0  0 28  | 29  0
4150            30  0  0  | 31 32 33  |  0 34
4151 .ve
4152 
4153    This can be represented as a collection of submatrices as:
4154 
4155 .vb
4156       A B C
4157       D E F
4158       G H I
4159 .ve
4160 
4161    Where the submatrices A,B,C are owned by proc0, D,E,F are
4162    owned by proc1, G,H,I are owned by proc2.
4163 
4164    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4165    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4166    The 'M','N' parameters are 8,8, and have the same values on all procs.
4167 
4168    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4169    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4170    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4171    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4172    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4173    matrix, ans [DF] as another SeqAIJ matrix.
4174 
4175    When d_nz, o_nz parameters are specified, d_nz storage elements are
4176    allocated for every row of the local diagonal submatrix, and o_nz
4177    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4178    One way to choose d_nz and o_nz is to use the max nonzerors per local
4179    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4180    In this case, the values of d_nz,o_nz are:
4181 .vb
4182      proc0 : dnz = 2, o_nz = 2
4183      proc1 : dnz = 3, o_nz = 2
4184      proc2 : dnz = 1, o_nz = 4
4185 .ve
4186    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4187    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4188    for proc3. i.e we are using 12+15+10=37 storage locations to store
4189    34 values.
4190 
4191    When d_nnz, o_nnz parameters are specified, the storage is specified
4192    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4193    In the above case the values for d_nnz,o_nnz are:
4194 .vb
4195      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4196      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4197      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4198 .ve
4199    Here the space allocated is sum of all the above values i.e 34, and
4200    hence pre-allocation is perfect.
4201 
4202    Level: intermediate
4203 
4204 .keywords: matrix, aij, compressed row, sparse, parallel
4205 
4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4207           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4208 @*/
4209 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4210 {
4211   PetscErrorCode ierr;
4212   PetscMPIInt    size;
4213 
4214   PetscFunctionBegin;
4215   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4216   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4217   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4218   if (size > 1) {
4219     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4220     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4221   } else {
4222     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4223     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4224   }
4225   PetscFunctionReturn(0);
4226 }
4227 
4228 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4229 {
4230   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4231   PetscBool      flg;
4232   PetscErrorCode ierr;
4233 
4234   PetscFunctionBegin;
4235   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4236   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4237   if (Ad)     *Ad     = a->A;
4238   if (Ao)     *Ao     = a->B;
4239   if (colmap) *colmap = a->garray;
4240   PetscFunctionReturn(0);
4241 }
4242 
4243 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4244 {
4245   PetscErrorCode ierr;
4246   PetscInt       m,N,i,rstart,nnz,Ii;
4247   PetscInt       *indx;
4248   PetscScalar    *values;
4249 
4250   PetscFunctionBegin;
4251   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4252   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4253     PetscInt       *dnz,*onz,sum,bs,cbs;
4254 
4255     if (n == PETSC_DECIDE) {
4256       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4257     }
4258     /* Check sum(n) = N */
4259     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4260     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4261 
4262     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4263     rstart -= m;
4264 
4265     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4266     for (i=0; i<m; i++) {
4267       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4268       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4269       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4270     }
4271 
4272     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4273     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4274     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4275     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4276     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4277     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4278     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4279     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4280   }
4281 
4282   /* numeric phase */
4283   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4284   for (i=0; i<m; i++) {
4285     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4286     Ii   = i + rstart;
4287     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4288     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4289   }
4290   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4291   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4292   PetscFunctionReturn(0);
4293 }
4294 
4295 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4296 {
4297   PetscErrorCode    ierr;
4298   PetscMPIInt       rank;
4299   PetscInt          m,N,i,rstart,nnz;
4300   size_t            len;
4301   const PetscInt    *indx;
4302   PetscViewer       out;
4303   char              *name;
4304   Mat               B;
4305   const PetscScalar *values;
4306 
4307   PetscFunctionBegin;
4308   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4309   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4310   /* Should this be the type of the diagonal block of A? */
4311   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4312   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4313   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4314   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4315   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4316   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4317   for (i=0; i<m; i++) {
4318     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4319     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4320     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4321   }
4322   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4323   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4324 
4325   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4326   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4327   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4328   sprintf(name,"%s.%d",outfile,rank);
4329   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4330   ierr = PetscFree(name);CHKERRQ(ierr);
4331   ierr = MatView(B,out);CHKERRQ(ierr);
4332   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4333   ierr = MatDestroy(&B);CHKERRQ(ierr);
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4338 {
4339   PetscErrorCode      ierr;
4340   Mat_Merge_SeqsToMPI *merge;
4341   PetscContainer      container;
4342 
4343   PetscFunctionBegin;
4344   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4345   if (container) {
4346     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4347     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4348     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4349     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4350     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4351     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4352     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4353     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4354     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4355     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4356     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4357     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4358     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4359     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4360     ierr = PetscFree(merge);CHKERRQ(ierr);
4361     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4362   }
4363   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4364   PetscFunctionReturn(0);
4365 }
4366 
4367 #include <../src/mat/utils/freespace.h>
4368 #include <petscbt.h>
4369 
4370 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4371 {
4372   PetscErrorCode      ierr;
4373   MPI_Comm            comm;
4374   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4375   PetscMPIInt         size,rank,taga,*len_s;
4376   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4377   PetscInt            proc,m;
4378   PetscInt            **buf_ri,**buf_rj;
4379   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4380   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4381   MPI_Request         *s_waits,*r_waits;
4382   MPI_Status          *status;
4383   MatScalar           *aa=a->a;
4384   MatScalar           **abuf_r,*ba_i;
4385   Mat_Merge_SeqsToMPI *merge;
4386   PetscContainer      container;
4387 
4388   PetscFunctionBegin;
4389   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4390   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4391 
4392   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4393   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4394 
4395   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4396   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4397 
4398   bi     = merge->bi;
4399   bj     = merge->bj;
4400   buf_ri = merge->buf_ri;
4401   buf_rj = merge->buf_rj;
4402 
4403   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4404   owners = merge->rowmap->range;
4405   len_s  = merge->len_s;
4406 
4407   /* send and recv matrix values */
4408   /*-----------------------------*/
4409   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4410   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4411 
4412   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4413   for (proc=0,k=0; proc<size; proc++) {
4414     if (!len_s[proc]) continue;
4415     i    = owners[proc];
4416     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4417     k++;
4418   }
4419 
4420   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4421   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4422   ierr = PetscFree(status);CHKERRQ(ierr);
4423 
4424   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4425   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4426 
4427   /* insert mat values of mpimat */
4428   /*----------------------------*/
4429   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4430   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4431 
4432   for (k=0; k<merge->nrecv; k++) {
4433     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4434     nrows       = *(buf_ri_k[k]);
4435     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4436     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4437   }
4438 
4439   /* set values of ba */
4440   m = merge->rowmap->n;
4441   for (i=0; i<m; i++) {
4442     arow = owners[rank] + i;
4443     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4444     bnzi = bi[i+1] - bi[i];
4445     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4446 
4447     /* add local non-zero vals of this proc's seqmat into ba */
4448     anzi   = ai[arow+1] - ai[arow];
4449     aj     = a->j + ai[arow];
4450     aa     = a->a + ai[arow];
4451     nextaj = 0;
4452     for (j=0; nextaj<anzi; j++) {
4453       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4454         ba_i[j] += aa[nextaj++];
4455       }
4456     }
4457 
4458     /* add received vals into ba */
4459     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4460       /* i-th row */
4461       if (i == *nextrow[k]) {
4462         anzi   = *(nextai[k]+1) - *nextai[k];
4463         aj     = buf_rj[k] + *(nextai[k]);
4464         aa     = abuf_r[k] + *(nextai[k]);
4465         nextaj = 0;
4466         for (j=0; nextaj<anzi; j++) {
4467           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4468             ba_i[j] += aa[nextaj++];
4469           }
4470         }
4471         nextrow[k]++; nextai[k]++;
4472       }
4473     }
4474     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4475   }
4476   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4477   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4478 
4479   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4480   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4481   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4482   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4483   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4484   PetscFunctionReturn(0);
4485 }
4486 
4487 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4488 {
4489   PetscErrorCode      ierr;
4490   Mat                 B_mpi;
4491   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4492   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4493   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4494   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4495   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4496   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4497   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4498   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4499   MPI_Status          *status;
4500   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4501   PetscBT             lnkbt;
4502   Mat_Merge_SeqsToMPI *merge;
4503   PetscContainer      container;
4504 
4505   PetscFunctionBegin;
4506   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4507 
4508   /* make sure it is a PETSc comm */
4509   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4511   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4512 
4513   ierr = PetscNew(&merge);CHKERRQ(ierr);
4514   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4515 
4516   /* determine row ownership */
4517   /*---------------------------------------------------------*/
4518   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4519   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4520   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4521   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4522   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4523   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4524   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4525 
4526   m      = merge->rowmap->n;
4527   owners = merge->rowmap->range;
4528 
4529   /* determine the number of messages to send, their lengths */
4530   /*---------------------------------------------------------*/
4531   len_s = merge->len_s;
4532 
4533   len          = 0; /* length of buf_si[] */
4534   merge->nsend = 0;
4535   for (proc=0; proc<size; proc++) {
4536     len_si[proc] = 0;
4537     if (proc == rank) {
4538       len_s[proc] = 0;
4539     } else {
4540       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4541       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4542     }
4543     if (len_s[proc]) {
4544       merge->nsend++;
4545       nrows = 0;
4546       for (i=owners[proc]; i<owners[proc+1]; i++) {
4547         if (ai[i+1] > ai[i]) nrows++;
4548       }
4549       len_si[proc] = 2*(nrows+1);
4550       len         += len_si[proc];
4551     }
4552   }
4553 
4554   /* determine the number and length of messages to receive for ij-structure */
4555   /*-------------------------------------------------------------------------*/
4556   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4557   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4558 
4559   /* post the Irecv of j-structure */
4560   /*-------------------------------*/
4561   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4562   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4563 
4564   /* post the Isend of j-structure */
4565   /*--------------------------------*/
4566   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4567 
4568   for (proc=0, k=0; proc<size; proc++) {
4569     if (!len_s[proc]) continue;
4570     i    = owners[proc];
4571     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4572     k++;
4573   }
4574 
4575   /* receives and sends of j-structure are complete */
4576   /*------------------------------------------------*/
4577   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4578   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4579 
4580   /* send and recv i-structure */
4581   /*---------------------------*/
4582   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4583   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4584 
4585   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4586   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4587   for (proc=0,k=0; proc<size; proc++) {
4588     if (!len_s[proc]) continue;
4589     /* form outgoing message for i-structure:
4590          buf_si[0]:                 nrows to be sent
4591                [1:nrows]:           row index (global)
4592                [nrows+1:2*nrows+1]: i-structure index
4593     */
4594     /*-------------------------------------------*/
4595     nrows       = len_si[proc]/2 - 1;
4596     buf_si_i    = buf_si + nrows+1;
4597     buf_si[0]   = nrows;
4598     buf_si_i[0] = 0;
4599     nrows       = 0;
4600     for (i=owners[proc]; i<owners[proc+1]; i++) {
4601       anzi = ai[i+1] - ai[i];
4602       if (anzi) {
4603         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4604         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4605         nrows++;
4606       }
4607     }
4608     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4609     k++;
4610     buf_si += len_si[proc];
4611   }
4612 
4613   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4614   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4615 
4616   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4617   for (i=0; i<merge->nrecv; i++) {
4618     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4619   }
4620 
4621   ierr = PetscFree(len_si);CHKERRQ(ierr);
4622   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4623   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4624   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4625   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4626   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4627   ierr = PetscFree(status);CHKERRQ(ierr);
4628 
4629   /* compute a local seq matrix in each processor */
4630   /*----------------------------------------------*/
4631   /* allocate bi array and free space for accumulating nonzero column info */
4632   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4633   bi[0] = 0;
4634 
4635   /* create and initialize a linked list */
4636   nlnk = N+1;
4637   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4638 
4639   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4640   len  = ai[owners[rank+1]] - ai[owners[rank]];
4641   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4642 
4643   current_space = free_space;
4644 
4645   /* determine symbolic info for each local row */
4646   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4647 
4648   for (k=0; k<merge->nrecv; k++) {
4649     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4650     nrows       = *buf_ri_k[k];
4651     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4652     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4653   }
4654 
4655   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4656   len  = 0;
4657   for (i=0; i<m; i++) {
4658     bnzi = 0;
4659     /* add local non-zero cols of this proc's seqmat into lnk */
4660     arow  = owners[rank] + i;
4661     anzi  = ai[arow+1] - ai[arow];
4662     aj    = a->j + ai[arow];
4663     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4664     bnzi += nlnk;
4665     /* add received col data into lnk */
4666     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4667       if (i == *nextrow[k]) { /* i-th row */
4668         anzi  = *(nextai[k]+1) - *nextai[k];
4669         aj    = buf_rj[k] + *nextai[k];
4670         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4671         bnzi += nlnk;
4672         nextrow[k]++; nextai[k]++;
4673       }
4674     }
4675     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4676 
4677     /* if free space is not available, make more free space */
4678     if (current_space->local_remaining<bnzi) {
4679       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4680       nspacedouble++;
4681     }
4682     /* copy data into free space, then initialize lnk */
4683     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4684     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4685 
4686     current_space->array           += bnzi;
4687     current_space->local_used      += bnzi;
4688     current_space->local_remaining -= bnzi;
4689 
4690     bi[i+1] = bi[i] + bnzi;
4691   }
4692 
4693   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4694 
4695   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4696   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4697   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4698 
4699   /* create symbolic parallel matrix B_mpi */
4700   /*---------------------------------------*/
4701   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4702   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4703   if (n==PETSC_DECIDE) {
4704     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4705   } else {
4706     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4707   }
4708   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4709   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4710   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4711   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4712   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4713 
4714   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4715   B_mpi->assembled    = PETSC_FALSE;
4716   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4717   merge->bi           = bi;
4718   merge->bj           = bj;
4719   merge->buf_ri       = buf_ri;
4720   merge->buf_rj       = buf_rj;
4721   merge->coi          = NULL;
4722   merge->coj          = NULL;
4723   merge->owners_co    = NULL;
4724 
4725   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4726 
4727   /* attach the supporting struct to B_mpi for reuse */
4728   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4729   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4730   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4731   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4732   *mpimat = B_mpi;
4733 
4734   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4735   PetscFunctionReturn(0);
4736 }
4737 
4738 /*@C
4739       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4740                  matrices from each processor
4741 
4742     Collective on MPI_Comm
4743 
4744    Input Parameters:
4745 +    comm - the communicators the parallel matrix will live on
4746 .    seqmat - the input sequential matrices
4747 .    m - number of local rows (or PETSC_DECIDE)
4748 .    n - number of local columns (or PETSC_DECIDE)
4749 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4750 
4751    Output Parameter:
4752 .    mpimat - the parallel matrix generated
4753 
4754     Level: advanced
4755 
4756    Notes:
4757      The dimensions of the sequential matrix in each processor MUST be the same.
4758      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4759      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4760 @*/
4761 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4762 {
4763   PetscErrorCode ierr;
4764   PetscMPIInt    size;
4765 
4766   PetscFunctionBegin;
4767   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4768   if (size == 1) {
4769     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4770     if (scall == MAT_INITIAL_MATRIX) {
4771       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4772     } else {
4773       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4774     }
4775     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4776     PetscFunctionReturn(0);
4777   }
4778   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4779   if (scall == MAT_INITIAL_MATRIX) {
4780     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4781   }
4782   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4783   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4784   PetscFunctionReturn(0);
4785 }
4786 
4787 /*@
4788      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4789           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4790           with MatGetSize()
4791 
4792     Not Collective
4793 
4794    Input Parameters:
4795 +    A - the matrix
4796 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4797 
4798    Output Parameter:
4799 .    A_loc - the local sequential matrix generated
4800 
4801     Level: developer
4802 
4803 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4804 
4805 @*/
4806 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4807 {
4808   PetscErrorCode ierr;
4809   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4810   Mat_SeqAIJ     *mat,*a,*b;
4811   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4812   MatScalar      *aa,*ba,*cam;
4813   PetscScalar    *ca;
4814   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4815   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4816   PetscBool      match;
4817   MPI_Comm       comm;
4818   PetscMPIInt    size;
4819 
4820   PetscFunctionBegin;
4821   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4822   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4823   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4824   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4825   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4826 
4827   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4828   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4829   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4830   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4831   aa = a->a; ba = b->a;
4832   if (scall == MAT_INITIAL_MATRIX) {
4833     if (size == 1) {
4834       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4835       PetscFunctionReturn(0);
4836     }
4837 
4838     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4839     ci[0] = 0;
4840     for (i=0; i<am; i++) {
4841       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4842     }
4843     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4844     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4845     k    = 0;
4846     for (i=0; i<am; i++) {
4847       ncols_o = bi[i+1] - bi[i];
4848       ncols_d = ai[i+1] - ai[i];
4849       /* off-diagonal portion of A */
4850       for (jo=0; jo<ncols_o; jo++) {
4851         col = cmap[*bj];
4852         if (col >= cstart) break;
4853         cj[k]   = col; bj++;
4854         ca[k++] = *ba++;
4855       }
4856       /* diagonal portion of A */
4857       for (j=0; j<ncols_d; j++) {
4858         cj[k]   = cstart + *aj++;
4859         ca[k++] = *aa++;
4860       }
4861       /* off-diagonal portion of A */
4862       for (j=jo; j<ncols_o; j++) {
4863         cj[k]   = cmap[*bj++];
4864         ca[k++] = *ba++;
4865       }
4866     }
4867     /* put together the new matrix */
4868     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4870     /* Since these are PETSc arrays, change flags to free them as necessary. */
4871     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4872     mat->free_a  = PETSC_TRUE;
4873     mat->free_ij = PETSC_TRUE;
4874     mat->nonew   = 0;
4875   } else if (scall == MAT_REUSE_MATRIX) {
4876     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4877     ci = mat->i; cj = mat->j; cam = mat->a;
4878     for (i=0; i<am; i++) {
4879       /* off-diagonal portion of A */
4880       ncols_o = bi[i+1] - bi[i];
4881       for (jo=0; jo<ncols_o; jo++) {
4882         col = cmap[*bj];
4883         if (col >= cstart) break;
4884         *cam++ = *ba++; bj++;
4885       }
4886       /* diagonal portion of A */
4887       ncols_d = ai[i+1] - ai[i];
4888       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4889       /* off-diagonal portion of A */
4890       for (j=jo; j<ncols_o; j++) {
4891         *cam++ = *ba++; bj++;
4892       }
4893     }
4894   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4895   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4896   PetscFunctionReturn(0);
4897 }
4898 
4899 /*@C
4900      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4901 
4902     Not Collective
4903 
4904    Input Parameters:
4905 +    A - the matrix
4906 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4907 -    row, col - index sets of rows and columns to extract (or NULL)
4908 
4909    Output Parameter:
4910 .    A_loc - the local sequential matrix generated
4911 
4912     Level: developer
4913 
4914 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4915 
4916 @*/
4917 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4918 {
4919   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4920   PetscErrorCode ierr;
4921   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4922   IS             isrowa,iscola;
4923   Mat            *aloc;
4924   PetscBool      match;
4925 
4926   PetscFunctionBegin;
4927   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4928   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4929   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4930   if (!row) {
4931     start = A->rmap->rstart; end = A->rmap->rend;
4932     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4933   } else {
4934     isrowa = *row;
4935   }
4936   if (!col) {
4937     start = A->cmap->rstart;
4938     cmap  = a->garray;
4939     nzA   = a->A->cmap->n;
4940     nzB   = a->B->cmap->n;
4941     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4942     ncols = 0;
4943     for (i=0; i<nzB; i++) {
4944       if (cmap[i] < start) idx[ncols++] = cmap[i];
4945       else break;
4946     }
4947     imark = i;
4948     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4949     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4950     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4951   } else {
4952     iscola = *col;
4953   }
4954   if (scall != MAT_INITIAL_MATRIX) {
4955     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4956     aloc[0] = *A_loc;
4957   }
4958   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4959   *A_loc = aloc[0];
4960   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4961   if (!row) {
4962     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4963   }
4964   if (!col) {
4965     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4966   }
4967   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4968   PetscFunctionReturn(0);
4969 }
4970 
4971 /*@C
4972     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4973 
4974     Collective on Mat
4975 
4976    Input Parameters:
4977 +    A,B - the matrices in mpiaij format
4978 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4979 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4980 
4981    Output Parameter:
4982 +    rowb, colb - index sets of rows and columns of B to extract
4983 -    B_seq - the sequential matrix generated
4984 
4985     Level: developer
4986 
4987 @*/
4988 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4989 {
4990   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4991   PetscErrorCode ierr;
4992   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4993   IS             isrowb,iscolb;
4994   Mat            *bseq=NULL;
4995 
4996   PetscFunctionBegin;
4997   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4998     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4999   }
5000   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5001 
5002   if (scall == MAT_INITIAL_MATRIX) {
5003     start = A->cmap->rstart;
5004     cmap  = a->garray;
5005     nzA   = a->A->cmap->n;
5006     nzB   = a->B->cmap->n;
5007     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5008     ncols = 0;
5009     for (i=0; i<nzB; i++) {  /* row < local row index */
5010       if (cmap[i] < start) idx[ncols++] = cmap[i];
5011       else break;
5012     }
5013     imark = i;
5014     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5015     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5016     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5017     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5018   } else {
5019     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5020     isrowb  = *rowb; iscolb = *colb;
5021     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5022     bseq[0] = *B_seq;
5023   }
5024   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5025   *B_seq = bseq[0];
5026   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5027   if (!rowb) {
5028     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5029   } else {
5030     *rowb = isrowb;
5031   }
5032   if (!colb) {
5033     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5034   } else {
5035     *colb = iscolb;
5036   }
5037   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5038   PetscFunctionReturn(0);
5039 }
5040 
5041 /*
5042     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5043     of the OFF-DIAGONAL portion of local A
5044 
5045     Collective on Mat
5046 
5047    Input Parameters:
5048 +    A,B - the matrices in mpiaij format
5049 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5050 
5051    Output Parameter:
5052 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5053 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5054 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5055 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5056 
5057     Level: developer
5058 
5059 */
5060 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5061 {
5062   VecScatter_MPI_General *gen_to,*gen_from;
5063   PetscErrorCode         ierr;
5064   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5065   Mat_SeqAIJ             *b_oth;
5066   VecScatter             ctx =a->Mvctx;
5067   MPI_Comm               comm;
5068   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5069   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5070   PetscInt               *rvalues,*svalues;
5071   MatScalar              *b_otha,*bufa,*bufA;
5072   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5073   MPI_Request            *rwaits = NULL,*swaits = NULL;
5074   MPI_Status             *sstatus,rstatus;
5075   PetscMPIInt            jj,size;
5076   PetscInt               *cols,sbs,rbs;
5077   PetscScalar            *vals;
5078 
5079   PetscFunctionBegin;
5080   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5082 
5083   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5084     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5085   }
5086   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5087   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5088 
5089   if (size == 1) {
5090     startsj_s = NULL;
5091     bufa_ptr  = NULL;
5092     *B_oth    = NULL;
5093     PetscFunctionReturn(0);
5094   }
5095 
5096   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5097   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5098   nrecvs   = gen_from->n;
5099   nsends   = gen_to->n;
5100 
5101   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5102   srow    = gen_to->indices;    /* local row index to be sent */
5103   sstarts = gen_to->starts;
5104   sprocs  = gen_to->procs;
5105   sstatus = gen_to->sstatus;
5106   sbs     = gen_to->bs;
5107   rstarts = gen_from->starts;
5108   rprocs  = gen_from->procs;
5109   rbs     = gen_from->bs;
5110 
5111   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5112   if (scall == MAT_INITIAL_MATRIX) {
5113     /* i-array */
5114     /*---------*/
5115     /*  post receives */
5116     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5117     for (i=0; i<nrecvs; i++) {
5118       rowlen = rvalues + rstarts[i]*rbs;
5119       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5120       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5121     }
5122 
5123     /* pack the outgoing message */
5124     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5125 
5126     sstartsj[0] = 0;
5127     rstartsj[0] = 0;
5128     len         = 0; /* total length of j or a array to be sent */
5129     k           = 0;
5130     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5131     for (i=0; i<nsends; i++) {
5132       rowlen = svalues + sstarts[i]*sbs;
5133       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5134       for (j=0; j<nrows; j++) {
5135         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5136         for (l=0; l<sbs; l++) {
5137           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5138 
5139           rowlen[j*sbs+l] = ncols;
5140 
5141           len += ncols;
5142           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5143         }
5144         k++;
5145       }
5146       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5147 
5148       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5149     }
5150     /* recvs and sends of i-array are completed */
5151     i = nrecvs;
5152     while (i--) {
5153       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5154     }
5155     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5156     ierr = PetscFree(svalues);CHKERRQ(ierr);
5157 
5158     /* allocate buffers for sending j and a arrays */
5159     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5160     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5161 
5162     /* create i-array of B_oth */
5163     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5164 
5165     b_othi[0] = 0;
5166     len       = 0; /* total length of j or a array to be received */
5167     k         = 0;
5168     for (i=0; i<nrecvs; i++) {
5169       rowlen = rvalues + rstarts[i]*rbs;
5170       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5171       for (j=0; j<nrows; j++) {
5172         b_othi[k+1] = b_othi[k] + rowlen[j];
5173         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5174         k++;
5175       }
5176       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5177     }
5178     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5179 
5180     /* allocate space for j and a arrrays of B_oth */
5181     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5182     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5183 
5184     /* j-array */
5185     /*---------*/
5186     /*  post receives of j-array */
5187     for (i=0; i<nrecvs; i++) {
5188       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5189       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5190     }
5191 
5192     /* pack the outgoing message j-array */
5193     k = 0;
5194     for (i=0; i<nsends; i++) {
5195       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5196       bufJ  = bufj+sstartsj[i];
5197       for (j=0; j<nrows; j++) {
5198         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5199         for (ll=0; ll<sbs; ll++) {
5200           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5201           for (l=0; l<ncols; l++) {
5202             *bufJ++ = cols[l];
5203           }
5204           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5205         }
5206       }
5207       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5208     }
5209 
5210     /* recvs and sends of j-array are completed */
5211     i = nrecvs;
5212     while (i--) {
5213       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5214     }
5215     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5216   } else if (scall == MAT_REUSE_MATRIX) {
5217     sstartsj = *startsj_s;
5218     rstartsj = *startsj_r;
5219     bufa     = *bufa_ptr;
5220     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5221     b_otha   = b_oth->a;
5222   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5223 
5224   /* a-array */
5225   /*---------*/
5226   /*  post receives of a-array */
5227   for (i=0; i<nrecvs; i++) {
5228     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5229     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5230   }
5231 
5232   /* pack the outgoing message a-array */
5233   k = 0;
5234   for (i=0; i<nsends; i++) {
5235     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5236     bufA  = bufa+sstartsj[i];
5237     for (j=0; j<nrows; j++) {
5238       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5239       for (ll=0; ll<sbs; ll++) {
5240         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5241         for (l=0; l<ncols; l++) {
5242           *bufA++ = vals[l];
5243         }
5244         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5245       }
5246     }
5247     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5248   }
5249   /* recvs and sends of a-array are completed */
5250   i = nrecvs;
5251   while (i--) {
5252     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5253   }
5254   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5255   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5256 
5257   if (scall == MAT_INITIAL_MATRIX) {
5258     /* put together the new matrix */
5259     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5260 
5261     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5262     /* Since these are PETSc arrays, change flags to free them as necessary. */
5263     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5264     b_oth->free_a  = PETSC_TRUE;
5265     b_oth->free_ij = PETSC_TRUE;
5266     b_oth->nonew   = 0;
5267 
5268     ierr = PetscFree(bufj);CHKERRQ(ierr);
5269     if (!startsj_s || !bufa_ptr) {
5270       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5271       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5272     } else {
5273       *startsj_s = sstartsj;
5274       *startsj_r = rstartsj;
5275       *bufa_ptr  = bufa;
5276     }
5277   }
5278   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5279   PetscFunctionReturn(0);
5280 }
5281 
5282 /*@C
5283   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5284 
5285   Not Collective
5286 
5287   Input Parameters:
5288 . A - The matrix in mpiaij format
5289 
5290   Output Parameter:
5291 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5292 . colmap - A map from global column index to local index into lvec
5293 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5294 
5295   Level: developer
5296 
5297 @*/
5298 #if defined(PETSC_USE_CTABLE)
5299 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5300 #else
5301 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5302 #endif
5303 {
5304   Mat_MPIAIJ *a;
5305 
5306   PetscFunctionBegin;
5307   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5308   PetscValidPointer(lvec, 2);
5309   PetscValidPointer(colmap, 3);
5310   PetscValidPointer(multScatter, 4);
5311   a = (Mat_MPIAIJ*) A->data;
5312   if (lvec) *lvec = a->lvec;
5313   if (colmap) *colmap = a->colmap;
5314   if (multScatter) *multScatter = a->Mvctx;
5315   PetscFunctionReturn(0);
5316 }
5317 
5318 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5319 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5320 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5321 #if defined(PETSC_HAVE_ELEMENTAL)
5322 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5323 #endif
5324 #if defined(PETSC_HAVE_HYPRE)
5325 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5326 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5327 #endif
5328 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5329 
5330 /*
5331     Computes (B'*A')' since computing B*A directly is untenable
5332 
5333                n                       p                          p
5334         (              )       (              )         (                  )
5335       m (      A       )  *  n (       B      )   =   m (         C        )
5336         (              )       (              )         (                  )
5337 
5338 */
5339 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5340 {
5341   PetscErrorCode ierr;
5342   Mat            At,Bt,Ct;
5343 
5344   PetscFunctionBegin;
5345   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5346   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5347   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5348   ierr = MatDestroy(&At);CHKERRQ(ierr);
5349   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5350   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5351   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5352   PetscFunctionReturn(0);
5353 }
5354 
5355 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5356 {
5357   PetscErrorCode ierr;
5358   PetscInt       m=A->rmap->n,n=B->cmap->n;
5359   Mat            Cmat;
5360 
5361   PetscFunctionBegin;
5362   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5363   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5364   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5365   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5366   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5367   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5368   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5369   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5370 
5371   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5372 
5373   *C = Cmat;
5374   PetscFunctionReturn(0);
5375 }
5376 
5377 /* ----------------------------------------------------------------*/
5378 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5379 {
5380   PetscErrorCode ierr;
5381 
5382   PetscFunctionBegin;
5383   if (scall == MAT_INITIAL_MATRIX) {
5384     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5385     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5386     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5387   }
5388   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5389   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5390   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5391   PetscFunctionReturn(0);
5392 }
5393 
5394 /*MC
5395    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5396 
5397    Options Database Keys:
5398 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5399 
5400   Level: beginner
5401 
5402 .seealso: MatCreateAIJ()
5403 M*/
5404 
5405 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5406 {
5407   Mat_MPIAIJ     *b;
5408   PetscErrorCode ierr;
5409   PetscMPIInt    size;
5410 
5411   PetscFunctionBegin;
5412   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5413 
5414   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5415   B->data       = (void*)b;
5416   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5417   B->assembled  = PETSC_FALSE;
5418   B->insertmode = NOT_SET_VALUES;
5419   b->size       = size;
5420 
5421   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5422 
5423   /* build cache for off array entries formed */
5424   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5425 
5426   b->donotstash  = PETSC_FALSE;
5427   b->colmap      = 0;
5428   b->garray      = 0;
5429   b->roworiented = PETSC_TRUE;
5430 
5431   /* stuff used for matrix vector multiply */
5432   b->lvec  = NULL;
5433   b->Mvctx = NULL;
5434 
5435   /* stuff for MatGetRow() */
5436   b->rowindices   = 0;
5437   b->rowvalues    = 0;
5438   b->getrowactive = PETSC_FALSE;
5439 
5440   /* flexible pointer used in CUSP/CUSPARSE classes */
5441   b->spptr = NULL;
5442 
5443   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5444   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5445   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5446   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5447   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5448   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5449   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5450   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5451   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5452   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5453 #if defined(PETSC_HAVE_ELEMENTAL)
5454   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5455 #endif
5456 #if defined(PETSC_HAVE_HYPRE)
5457   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5458 #endif
5459   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5460   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5461   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5462   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5463 #if defined(PETSC_HAVE_HYPRE)
5464   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5465 #endif
5466   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5467   PetscFunctionReturn(0);
5468 }
5469 
5470 /*@C
5471      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5472          and "off-diagonal" part of the matrix in CSR format.
5473 
5474    Collective on MPI_Comm
5475 
5476    Input Parameters:
5477 +  comm - MPI communicator
5478 .  m - number of local rows (Cannot be PETSC_DECIDE)
5479 .  n - This value should be the same as the local size used in creating the
5480        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5481        calculated if N is given) For square matrices n is almost always m.
5482 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5483 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5484 .   i - row indices for "diagonal" portion of matrix
5485 .   j - column indices
5486 .   a - matrix values
5487 .   oi - row indices for "off-diagonal" portion of matrix
5488 .   oj - column indices
5489 -   oa - matrix values
5490 
5491    Output Parameter:
5492 .   mat - the matrix
5493 
5494    Level: advanced
5495 
5496    Notes:
5497        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5498        must free the arrays once the matrix has been destroyed and not before.
5499 
5500        The i and j indices are 0 based
5501 
5502        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5503 
5504        This sets local rows and cannot be used to set off-processor values.
5505 
5506        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5507        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5508        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5509        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5510        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5511        communication if it is known that only local entries will be set.
5512 
5513 .keywords: matrix, aij, compressed row, sparse, parallel
5514 
5515 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5516           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5517 @*/
5518 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5519 {
5520   PetscErrorCode ierr;
5521   Mat_MPIAIJ     *maij;
5522 
5523   PetscFunctionBegin;
5524   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5525   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5526   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5527   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5528   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5529   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5530   maij = (Mat_MPIAIJ*) (*mat)->data;
5531 
5532   (*mat)->preallocated = PETSC_TRUE;
5533 
5534   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5535   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5536 
5537   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5538   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5539 
5540   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5541   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5542   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5543   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5544 
5545   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5546   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5547   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5548   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5549   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 /*
5554     Special version for direct calls from Fortran
5555 */
5556 #include <petsc/private/fortranimpl.h>
5557 
5558 /* Change these macros so can be used in void function */
5559 #undef CHKERRQ
5560 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5561 #undef SETERRQ2
5562 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5563 #undef SETERRQ3
5564 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5565 #undef SETERRQ
5566 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5567 
5568 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5569 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5570 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5571 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5572 #else
5573 #endif
5574 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5575 {
5576   Mat            mat  = *mmat;
5577   PetscInt       m    = *mm, n = *mn;
5578   InsertMode     addv = *maddv;
5579   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5580   PetscScalar    value;
5581   PetscErrorCode ierr;
5582 
5583   MatCheckPreallocated(mat,1);
5584   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5585 
5586 #if defined(PETSC_USE_DEBUG)
5587   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5588 #endif
5589   {
5590     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5591     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5592     PetscBool roworiented = aij->roworiented;
5593 
5594     /* Some Variables required in the macro */
5595     Mat        A                 = aij->A;
5596     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5597     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5598     MatScalar  *aa               = a->a;
5599     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5600     Mat        B                 = aij->B;
5601     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5602     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5603     MatScalar  *ba               = b->a;
5604 
5605     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5606     PetscInt  nonew = a->nonew;
5607     MatScalar *ap1,*ap2;
5608 
5609     PetscFunctionBegin;
5610     for (i=0; i<m; i++) {
5611       if (im[i] < 0) continue;
5612 #if defined(PETSC_USE_DEBUG)
5613       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5614 #endif
5615       if (im[i] >= rstart && im[i] < rend) {
5616         row      = im[i] - rstart;
5617         lastcol1 = -1;
5618         rp1      = aj + ai[row];
5619         ap1      = aa + ai[row];
5620         rmax1    = aimax[row];
5621         nrow1    = ailen[row];
5622         low1     = 0;
5623         high1    = nrow1;
5624         lastcol2 = -1;
5625         rp2      = bj + bi[row];
5626         ap2      = ba + bi[row];
5627         rmax2    = bimax[row];
5628         nrow2    = bilen[row];
5629         low2     = 0;
5630         high2    = nrow2;
5631 
5632         for (j=0; j<n; j++) {
5633           if (roworiented) value = v[i*n+j];
5634           else value = v[i+j*m];
5635           if (in[j] >= cstart && in[j] < cend) {
5636             col = in[j] - cstart;
5637             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5638             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5639           } else if (in[j] < 0) continue;
5640 #if defined(PETSC_USE_DEBUG)
5641           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5642 #endif
5643           else {
5644             if (mat->was_assembled) {
5645               if (!aij->colmap) {
5646                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5647               }
5648 #if defined(PETSC_USE_CTABLE)
5649               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5650               col--;
5651 #else
5652               col = aij->colmap[in[j]] - 1;
5653 #endif
5654               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5655               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5656                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5657                 col  =  in[j];
5658                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5659                 B     = aij->B;
5660                 b     = (Mat_SeqAIJ*)B->data;
5661                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5662                 rp2   = bj + bi[row];
5663                 ap2   = ba + bi[row];
5664                 rmax2 = bimax[row];
5665                 nrow2 = bilen[row];
5666                 low2  = 0;
5667                 high2 = nrow2;
5668                 bm    = aij->B->rmap->n;
5669                 ba    = b->a;
5670               }
5671             } else col = in[j];
5672             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5673           }
5674         }
5675       } else if (!aij->donotstash) {
5676         if (roworiented) {
5677           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5678         } else {
5679           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5680         }
5681       }
5682     }
5683   }
5684   PetscFunctionReturnVoid();
5685 }
5686 
5687