xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5a856986583887c326abe5dfd149e8184a29cd80)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
218   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) { \
444             ap1[_i] += value;   \
445             /* Not sure LogFlops will slow dow the code or not */ \
446             (void)PetscLogFlops(1.0);   \
447            } \
448           else                    ap1[_i] = value; \
449           goto a_noinsert; \
450         } \
451       }  \
452       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
453       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
454       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
455       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
456       N = nrow1++ - 1; a->nz++; high1++; \
457       /* shift up all the later entries in this row */ \
458       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
459       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
460       rp1[_i] = col;  \
461       ap1[_i] = value;  \
462       A->nonzerostate++;\
463       a_noinsert: ; \
464       ailen[row] = nrow1; \
465 }
466 
467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
468   { \
469     if (col <= lastcol2) low2 = 0;                        \
470     else high2 = nrow2;                                   \
471     lastcol2 = col;                                       \
472     while (high2-low2 > 5) {                              \
473       t = (low2+high2)/2;                                 \
474       if (rp2[t] > col) high2 = t;                        \
475       else             low2  = t;                         \
476     }                                                     \
477     for (_i=low2; _i<high2; _i++) {                       \
478       if (rp2[_i] > col) break;                           \
479       if (rp2[_i] == col) {                               \
480         if (addv == ADD_VALUES) {                         \
481           ap2[_i] += value;                               \
482           (void)PetscLogFlops(1.0);                       \
483         }                                                 \
484         else                    ap2[_i] = value;          \
485         goto b_noinsert;                                  \
486       }                                                   \
487     }                                                     \
488     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
489     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
490     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
491     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
492     N = nrow2++ - 1; b->nz++; high2++;                    \
493     /* shift up all the later entries in this row */      \
494     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
495     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
496     rp2[_i] = col;                                        \
497     ap2[_i] = value;                                      \
498     B->nonzerostate++;                                    \
499     b_noinsert: ;                                         \
500     bilen[row] = nrow2;                                   \
501   }
502 
503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
504 {
505   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
506   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
507   PetscErrorCode ierr;
508   PetscInt       l,*garray = mat->garray,diag;
509 
510   PetscFunctionBegin;
511   /* code only works for square matrices A */
512 
513   /* find size of row to the left of the diagonal part */
514   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
515   row  = row - diag;
516   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
517     if (garray[b->j[b->i[row]+l]] > diag) break;
518   }
519   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
520 
521   /* diagonal part */
522   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
523 
524   /* right of diagonal part */
525   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
530 {
531   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
532   PetscScalar    value = 0.0;
533   PetscErrorCode ierr;
534   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
535   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
536   PetscBool      roworiented = aij->roworiented;
537 
538   /* Some Variables required in the macro */
539   Mat        A                 = aij->A;
540   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
541   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
542   MatScalar  *aa               = a->a;
543   PetscBool  ignorezeroentries = a->ignorezeroentries;
544   Mat        B                 = aij->B;
545   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
546   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
547   MatScalar  *ba               = b->a;
548 
549   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
550   PetscInt  nonew;
551   MatScalar *ap1,*ap2;
552 
553   PetscFunctionBegin;
554   for (i=0; i<m; i++) {
555     if (im[i] < 0) continue;
556 #if defined(PETSC_USE_DEBUG)
557     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
558 #endif
559     if (im[i] >= rstart && im[i] < rend) {
560       row      = im[i] - rstart;
561       lastcol1 = -1;
562       rp1      = aj + ai[row];
563       ap1      = aa + ai[row];
564       rmax1    = aimax[row];
565       nrow1    = ailen[row];
566       low1     = 0;
567       high1    = nrow1;
568       lastcol2 = -1;
569       rp2      = bj + bi[row];
570       ap2      = ba + bi[row];
571       rmax2    = bimax[row];
572       nrow2    = bilen[row];
573       low2     = 0;
574       high2    = nrow2;
575 
576       for (j=0; j<n; j++) {
577         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
578         if (in[j] >= cstart && in[j] < cend) {
579           col   = in[j] - cstart;
580           nonew = a->nonew;
581           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) {
614               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
615                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
616               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
617             }
618           } else col = in[j];
619           nonew = b->nonew;
620           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
621         }
622       }
623     } else {
624       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
625       if (!aij->donotstash) {
626         mat->assembled = PETSC_FALSE;
627         if (roworiented) {
628           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
629         } else {
630           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
631         }
632       }
633     }
634   }
635   PetscFunctionReturn(0);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
646   Mat            A           = aij->A; /* diagonal part of the matrix */
647   Mat            B           = aij->B; /* offdiagonal part of the matrix */
648   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
649   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
650   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
651   PetscInt       *ailen      = a->ilen,*aj = a->j;
652   PetscInt       *bilen      = b->ilen,*bj = b->j;
653   PetscInt       am          = aij->A->rmap->n,j;
654   PetscInt       diag_so_far = 0,dnz;
655   PetscInt       offd_so_far = 0,onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j=0; j<am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(0);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
688   Mat            A      = aij->A; /* diagonal part of the matrix */
689   Mat            B      = aij->B; /* offdiagonal part of the matrix */
690   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
691   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
692   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
693   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
694   PetscInt       *ailen = a->ilen,*aj = a->j;
695   PetscInt       *bilen = b->ilen,*bj = b->j;
696   PetscInt       am     = aij->A->rmap->n,j;
697   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
699   PetscScalar    *aa = a->a,*ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j=0; j<am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd = full_offd_i[j];
706     rowstart_diag = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag+dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd+onz_row] = mat_j[col];
716         ba[rowstart_offd+onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
727 {
728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
729   PetscErrorCode ierr;
730   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
731   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
732 
733   PetscFunctionBegin;
734   for (i=0; i<m; i++) {
735     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
736     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
737     if (idxm[i] >= rstart && idxm[i] < rend) {
738       row = idxm[i] - rstart;
739       for (j=0; j<n; j++) {
740         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
741         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
742         if (idxn[j] >= cstart && idxn[j] < cend) {
743           col  = idxn[j] - cstart;
744           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
745         } else {
746           if (!aij->colmap) {
747             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
748           }
749 #if defined(PETSC_USE_CTABLE)
750           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
751           col--;
752 #else
753           col = aij->colmap[idxn[j]] - 1;
754 #endif
755           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
756           else {
757             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
758           }
759         }
760       }
761     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
767 
768 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
769 {
770   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
771   PetscErrorCode ierr;
772   PetscInt       nstash,reallocs;
773 
774   PetscFunctionBegin;
775   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
776 
777   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
778   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
779   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
784 {
785   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
786   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
787   PetscErrorCode ierr;
788   PetscMPIInt    n;
789   PetscInt       i,j,rstart,ncols,flg;
790   PetscInt       *row,*col;
791   PetscBool      other_disassembled;
792   PetscScalar    *val;
793 
794   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
795 
796   PetscFunctionBegin;
797   if (!aij->donotstash && !mat->nooffprocentries) {
798     while (1) {
799       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
800       if (!flg) break;
801 
802       for (i=0; i<n; ) {
803         /* Now identify the consecutive vals belonging to the same row */
804         for (j=i,rstart=row[j]; j<n; j++) {
805           if (row[j] != rstart) break;
806         }
807         if (j < n) ncols = j-i;
808         else       ncols = n-i;
809         /* Now assemble all these values with a single function call */
810         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
811 
812         i = j;
813       }
814     }
815     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
816   }
817   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
818   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
819 
820   /* determine if any processor has disassembled, if so we must
821      also disassemble ourself, in order that we may reassemble. */
822   /*
823      if nonzero structure of submatrix B cannot change then we know that
824      no processor disassembled thus we can skip this stuff
825   */
826   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
827     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
828     if (mat->was_assembled && !other_disassembled) {
829       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
830     }
831   }
832   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
833     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
834   }
835   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
836   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
837   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
838 
839   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
840 
841   aij->rowvalues = 0;
842 
843   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
844   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
845 
846   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
847   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
848     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
849     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
855 {
856   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
857   PetscErrorCode ierr;
858 
859   PetscFunctionBegin;
860   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
861   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
866 {
867   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
868   PetscObjectState sA, sB;
869   PetscInt        *lrows;
870   PetscInt         r, len;
871   PetscBool        cong, lch, gch;
872   PetscErrorCode   ierr;
873 
874   PetscFunctionBegin;
875   /* get locally owned rows */
876   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
877   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
878   /* fix right hand side if needed */
879   if (x && b) {
880     const PetscScalar *xx;
881     PetscScalar       *bb;
882 
883     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
884     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
886     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
887     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
888     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
889   }
890 
891   sA = mat->A->nonzerostate;
892   sB = mat->B->nonzerostate;
893 
894   if (diag != 0.0 && cong) {
895     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
896     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
897   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
898     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
899     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
900     PetscInt   nnwA, nnwB;
901     PetscBool  nnzA, nnzB;
902 
903     nnwA = aijA->nonew;
904     nnwB = aijB->nonew;
905     nnzA = aijA->keepnonzeropattern;
906     nnzB = aijB->keepnonzeropattern;
907     if (!nnzA) {
908       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
909       aijA->nonew = 0;
910     }
911     if (!nnzB) {
912       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
913       aijB->nonew = 0;
914     }
915     /* Must zero here before the next loop */
916     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
917     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
918     for (r = 0; r < len; ++r) {
919       const PetscInt row = lrows[r] + A->rmap->rstart;
920       if (row >= A->cmap->N) continue;
921       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
922     }
923     aijA->nonew = nnwA;
924     aijB->nonew = nnwB;
925   } else {
926     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
927     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
928   }
929   ierr = PetscFree(lrows);CHKERRQ(ierr);
930   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
932 
933   /* reduce nonzerostate */
934   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
935   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
936   if (gch) A->nonzerostate++;
937   PetscFunctionReturn(0);
938 }
939 
940 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
941 {
942   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       n = A->rmap->n;
945   PetscInt          i,j,r,m,p = 0,len = 0;
946   PetscInt          *lrows,*owners = A->rmap->range;
947   PetscSFNode       *rrows;
948   PetscSF           sf;
949   const PetscScalar *xx;
950   PetscScalar       *bb,*mask;
951   Vec               xmask,lmask;
952   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
953   const PetscInt    *aj, *ii,*ridx;
954   PetscScalar       *aa;
955 
956   PetscFunctionBegin;
957   /* Create SF where leaves are input rows and roots are owned rows */
958   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
959   for (r = 0; r < n; ++r) lrows[r] = -1;
960   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
961   for (r = 0; r < N; ++r) {
962     const PetscInt idx   = rows[r];
963     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
964     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
965       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
966     }
967     rrows[r].rank  = p;
968     rrows[r].index = rows[r] - owners[p];
969   }
970   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
971   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
972   /* Collect flags for rows to be zeroed */
973   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
974   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
975   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
976   /* Compress and put in row numbers */
977   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
978   /* zero diagonal part of matrix */
979   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
980   /* handle off diagonal part of matrix */
981   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
982   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
983   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
984   for (i=0; i<len; i++) bb[lrows[i]] = 1;
985   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
986   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
989   if (x && b) { /* this code is buggy when the row and column layout don't match */
990     PetscBool cong;
991 
992     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
993     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
994     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
995     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
997     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
998   }
999   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1000   /* remove zeroed rows of off diagonal matrix */
1001   ii = aij->i;
1002   for (i=0; i<len; i++) {
1003     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1004   }
1005   /* loop over all elements of off process part of matrix zeroing removed columns*/
1006   if (aij->compressedrow.use) {
1007     m    = aij->compressedrow.nrows;
1008     ii   = aij->compressedrow.i;
1009     ridx = aij->compressedrow.rindex;
1010     for (i=0; i<m; i++) {
1011       n  = ii[i+1] - ii[i];
1012       aj = aij->j + ii[i];
1013       aa = aij->a + ii[i];
1014 
1015       for (j=0; j<n; j++) {
1016         if (PetscAbsScalar(mask[*aj])) {
1017           if (b) bb[*ridx] -= *aa*xx[*aj];
1018           *aa = 0.0;
1019         }
1020         aa++;
1021         aj++;
1022       }
1023       ridx++;
1024     }
1025   } else { /* do not use compressed row format */
1026     m = l->B->rmap->n;
1027     for (i=0; i<m; i++) {
1028       n  = ii[i+1] - ii[i];
1029       aj = aij->j + ii[i];
1030       aa = aij->a + ii[i];
1031       for (j=0; j<n; j++) {
1032         if (PetscAbsScalar(mask[*aj])) {
1033           if (b) bb[i] -= *aa*xx[*aj];
1034           *aa = 0.0;
1035         }
1036         aa++;
1037         aj++;
1038       }
1039     }
1040   }
1041   if (x && b) {
1042     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1043     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1044   }
1045   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1046   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1047   ierr = PetscFree(lrows);CHKERRQ(ierr);
1048 
1049   /* only change matrix nonzero state if pattern was allowed to be changed */
1050   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1051     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1052     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1058 {
1059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1060   PetscErrorCode ierr;
1061   PetscInt       nt;
1062   VecScatter     Mvctx = a->Mvctx;
1063 
1064   PetscFunctionBegin;
1065   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1066   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1067 
1068   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1070   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1086 {
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088   PetscErrorCode ierr;
1089   VecScatter     Mvctx = a->Mvctx;
1090 
1091   PetscFunctionBegin;
1092   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1093   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1094   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1095   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1096   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1101 {
1102   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1103   PetscErrorCode ierr;
1104 
1105   PetscFunctionBegin;
1106   /* do nondiagonal part */
1107   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1108   /* do local part */
1109   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1110   /* add partial results together */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1117 {
1118   MPI_Comm       comm;
1119   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1120   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1121   IS             Me,Notme;
1122   PetscErrorCode ierr;
1123   PetscInt       M,N,first,last,*notme,i;
1124   PetscBool      lf;
1125   PetscMPIInt    size;
1126 
1127   PetscFunctionBegin;
1128   /* Easy test: symmetric diagonal block */
1129   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1130   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1131   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1132   if (!*f) PetscFunctionReturn(0);
1133   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1134   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1135   if (size == 1) PetscFunctionReturn(0);
1136 
1137   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1138   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1139   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1140   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1141   for (i=0; i<first; i++) notme[i] = i;
1142   for (i=last; i<M; i++) notme[i-last+first] = i;
1143   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1144   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1145   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1146   Aoff = Aoffs[0];
1147   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1148   Boff = Boffs[0];
1149   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1150   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1151   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1152   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1153   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1154   ierr = PetscFree(notme);CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1159 {
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1168 {
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   /* do nondiagonal part */
1174   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1175   /* do local part */
1176   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1177   /* add partial results together */
1178   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 /*
1184   This only works correctly for square matrices where the subblock A->A is the
1185    diagonal block
1186 */
1187 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1188 {
1189   PetscErrorCode ierr;
1190   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1191 
1192   PetscFunctionBegin;
1193   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1194   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1195   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1206   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1211 {
1212   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1213   PetscErrorCode ierr;
1214 
1215   PetscFunctionBegin;
1216 #if defined(PETSC_USE_LOG)
1217   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1218 #endif
1219   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1220   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1221   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1222   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1223 #if defined(PETSC_USE_CTABLE)
1224   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1225 #else
1226   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1227 #endif
1228   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1229   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1230   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1231   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1232   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1233   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1234   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1235 
1236   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1245 #if defined(PETSC_HAVE_ELEMENTAL)
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1247 #endif
1248 #if defined(PETSC_HAVE_HYPRE)
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1251 #endif
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1258 {
1259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1260   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1261   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1262   PetscErrorCode ierr;
1263   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1264   int            fd;
1265   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1266   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1267   PetscScalar    *column_values;
1268   PetscInt       message_count,flowcontrolcount;
1269   FILE           *file;
1270 
1271   PetscFunctionBegin;
1272   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1273   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1274   nz   = A->nz + B->nz;
1275   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1276   if (!rank) {
1277     header[0] = MAT_FILE_CLASSID;
1278     header[1] = mat->rmap->N;
1279     header[2] = mat->cmap->N;
1280 
1281     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1283     /* get largest number of rows any processor has */
1284     rlen  = mat->rmap->n;
1285     range = mat->rmap->range;
1286     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1287   } else {
1288     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289     rlen = mat->rmap->n;
1290   }
1291 
1292   /* load up the local row counts */
1293   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1294   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1295 
1296   /* store the row lengths to the file */
1297   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1298   if (!rank) {
1299     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1300     for (i=1; i<size; i++) {
1301       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1302       rlen = range[i+1] - range[i];
1303       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1304       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     }
1306     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1307   } else {
1308     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1309     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1313 
1314   /* load up the local column indices */
1315   nzmax = nz; /* th processor needs space a largest processor needs */
1316   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1318   cnt   = 0;
1319   for (i=0; i<mat->rmap->n; i++) {
1320     for (j=B->i[i]; j<B->i[i+1]; j++) {
1321       if ((col = garray[B->j[j]]) > cstart) break;
1322       column_indices[cnt++] = col;
1323     }
1324     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1325     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1326   }
1327   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1328 
1329   /* store the column indices to the file */
1330   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1331   if (!rank) {
1332     MPI_Status status;
1333     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     for (i=1; i<size; i++) {
1335       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1336       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1337       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1338       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1339       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1340     }
1341     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1342   } else {
1343     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1344     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1345     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1349 
1350   /* load up the local column values */
1351   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1352   cnt  = 0;
1353   for (i=0; i<mat->rmap->n; i++) {
1354     for (j=B->i[i]; j<B->i[i+1]; j++) {
1355       if (garray[B->j[j]] > cstart) break;
1356       column_values[cnt++] = B->a[j];
1357     }
1358     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1359     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column values to the file */
1364   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1379     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_values);CHKERRQ(ierr);
1383 
1384   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1385   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #include <petscdraw.h>
1390 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1391 {
1392   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1393   PetscErrorCode    ierr;
1394   PetscMPIInt       rank = aij->rank,size = aij->size;
1395   PetscBool         isdraw,iascii,isbinary;
1396   PetscViewer       sviewer;
1397   PetscViewerFormat format;
1398 
1399   PetscFunctionBegin;
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1402   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1403   if (iascii) {
1404     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1405     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1406       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1407       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1408       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1409       for (i=0; i<(PetscInt)size; i++) {
1410         nmax = PetscMax(nmax,nz[i]);
1411         nmin = PetscMin(nmin,nz[i]);
1412         navg += nz[i];
1413       }
1414       ierr = PetscFree(nz);CHKERRQ(ierr);
1415       navg = navg/size;
1416       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1417       PetscFunctionReturn(0);
1418     }
1419     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1420     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1421       MatInfo   info;
1422       PetscBool inodes;
1423 
1424       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1425       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1426       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1427       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1428       if (!inodes) {
1429         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1430                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1431       } else {
1432         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1433                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1434       }
1435       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1436       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1437       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1438       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1439       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1440       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1442       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1443       PetscFunctionReturn(0);
1444     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1445       PetscInt inodecount,inodelimit,*inodes;
1446       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1447       if (inodes) {
1448         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1449       } else {
1450         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1451       }
1452       PetscFunctionReturn(0);
1453     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1454       PetscFunctionReturn(0);
1455     }
1456   } else if (isbinary) {
1457     if (size == 1) {
1458       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1459       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1460     } else {
1461       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1462     }
1463     PetscFunctionReturn(0);
1464   } else if (iascii && size == 1) {
1465     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1466     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1467     PetscFunctionReturn(0);
1468   } else if (isdraw) {
1469     PetscDraw draw;
1470     PetscBool isnull;
1471     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1472     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1473     if (isnull) PetscFunctionReturn(0);
1474   }
1475 
1476   { /* assemble the entire matrix onto first processor */
1477     Mat A = NULL, Av;
1478     IS  isrow,iscol;
1479 
1480     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1482     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1483     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1484 /*  The commented code uses MatCreateSubMatrices instead */
1485 /*
1486     Mat *AA, A = NULL, Av;
1487     IS  isrow,iscol;
1488 
1489     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1490     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1491     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1492     if (!rank) {
1493        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1494        A    = AA[0];
1495        Av   = AA[0];
1496     }
1497     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1498 */
1499     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1500     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1501     /*
1502        Everyone has to call to draw the matrix since the graphics waits are
1503        synchronized across all processors that share the PetscDraw object
1504     */
1505     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1506     if (!rank) {
1507       if (((PetscObject)mat)->name) {
1508         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1509       }
1510       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1511     }
1512     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1513     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1514     ierr = MatDestroy(&A);CHKERRQ(ierr);
1515   }
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1520 {
1521   PetscErrorCode ierr;
1522   PetscBool      iascii,isdraw,issocket,isbinary;
1523 
1524   PetscFunctionBegin;
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1527   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1528   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1529   if (iascii || isdraw || isbinary || issocket) {
1530     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1536 {
1537   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1538   PetscErrorCode ierr;
1539   Vec            bb1 = 0;
1540   PetscBool      hasop;
1541 
1542   PetscFunctionBegin;
1543   if (flag == SOR_APPLY_UPPER) {
1544     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1545     PetscFunctionReturn(0);
1546   }
1547 
1548   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1549     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1550   }
1551 
1552   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1553     if (flag & SOR_ZERO_INITIAL_GUESS) {
1554       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1555       its--;
1556     }
1557 
1558     while (its--) {
1559       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1560       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1561 
1562       /* update rhs: bb1 = bb - B*x */
1563       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1564       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1565 
1566       /* local sweep */
1567       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1568     }
1569   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1570     if (flag & SOR_ZERO_INITIAL_GUESS) {
1571       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1572       its--;
1573     }
1574     while (its--) {
1575       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1576       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577 
1578       /* update rhs: bb1 = bb - B*x */
1579       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1580       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1581 
1582       /* local sweep */
1583       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1584     }
1585   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1586     if (flag & SOR_ZERO_INITIAL_GUESS) {
1587       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1588       its--;
1589     }
1590     while (its--) {
1591       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593 
1594       /* update rhs: bb1 = bb - B*x */
1595       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1596       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1597 
1598       /* local sweep */
1599       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1600     }
1601   } else if (flag & SOR_EISENSTAT) {
1602     Vec xx1;
1603 
1604     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1605     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1606 
1607     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609     if (!mat->diag) {
1610       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1611       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1612     }
1613     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1614     if (hasop) {
1615       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1616     } else {
1617       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1618     }
1619     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1620 
1621     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1622 
1623     /* local sweep */
1624     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1625     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1626     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1627   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1628 
1629   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1630 
1631   matin->factorerrortype = mat->A->factorerrortype;
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1636 {
1637   Mat            aA,aB,Aperm;
1638   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1639   PetscScalar    *aa,*ba;
1640   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1641   PetscSF        rowsf,sf;
1642   IS             parcolp = NULL;
1643   PetscBool      done;
1644   PetscErrorCode ierr;
1645 
1646   PetscFunctionBegin;
1647   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1648   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1649   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1650   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1651 
1652   /* Invert row permutation to find out where my rows should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1656   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1657   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1659 
1660   /* Invert column permutation to find out where my columns should go */
1661   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1662   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1663   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1664   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1665   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1666   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1667   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1668 
1669   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1670   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1671   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1672 
1673   /* Find out where my gcols should go */
1674   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1675   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1676   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1677   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1678   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1679   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1680   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1681   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1682 
1683   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1684   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1685   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1686   for (i=0; i<m; i++) {
1687     PetscInt row = rdest[i],rowner;
1688     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1689     for (j=ai[i]; j<ai[i+1]; j++) {
1690       PetscInt cowner,col = cdest[aj[j]];
1691       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1692       if (rowner == cowner) dnnz[i]++;
1693       else onnz[i]++;
1694     }
1695     for (j=bi[i]; j<bi[i+1]; j++) {
1696       PetscInt cowner,col = gcdest[bj[j]];
1697       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1698       if (rowner == cowner) dnnz[i]++;
1699       else onnz[i]++;
1700     }
1701   }
1702   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1703   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1704   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1705   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1706   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1707 
1708   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1709   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1710   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1711   for (i=0; i<m; i++) {
1712     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1713     PetscInt j0,rowlen;
1714     rowlen = ai[i+1] - ai[i];
1715     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719     rowlen = bi[i+1] - bi[i];
1720     for (j0=j=0; j<rowlen; j0=j) {
1721       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1722       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1723     }
1724   }
1725   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1726   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1728   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1729   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1730   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1731   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1732   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1733   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1734   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1735   *B = Aperm;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1740 {
1741   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1746   if (ghosts) *ghosts = aij->garray;
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1751 {
1752   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1753   Mat            A    = mat->A,B = mat->B;
1754   PetscErrorCode ierr;
1755   PetscReal      isend[5],irecv[5];
1756 
1757   PetscFunctionBegin;
1758   info->block_size = 1.0;
1759   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1762   isend[3] = info->memory;  isend[4] = info->mallocs;
1763 
1764   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1765 
1766   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1767   isend[3] += info->memory;  isend[4] += info->mallocs;
1768   if (flag == MAT_LOCAL) {
1769     info->nz_used      = isend[0];
1770     info->nz_allocated = isend[1];
1771     info->nz_unneeded  = isend[2];
1772     info->memory       = isend[3];
1773     info->mallocs      = isend[4];
1774   } else if (flag == MAT_GLOBAL_MAX) {
1775     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1776 
1777     info->nz_used      = irecv[0];
1778     info->nz_allocated = irecv[1];
1779     info->nz_unneeded  = irecv[2];
1780     info->memory       = irecv[3];
1781     info->mallocs      = irecv[4];
1782   } else if (flag == MAT_GLOBAL_SUM) {
1783     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1784 
1785     info->nz_used      = irecv[0];
1786     info->nz_allocated = irecv[1];
1787     info->nz_unneeded  = irecv[2];
1788     info->memory       = irecv[3];
1789     info->mallocs      = irecv[4];
1790   }
1791   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1792   info->fill_ratio_needed = 0;
1793   info->factor_mallocs    = 0;
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1798 {
1799   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1800   PetscErrorCode ierr;
1801 
1802   PetscFunctionBegin;
1803   switch (op) {
1804   case MAT_NEW_NONZERO_LOCATIONS:
1805   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1806   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1807   case MAT_KEEP_NONZERO_PATTERN:
1808   case MAT_NEW_NONZERO_LOCATION_ERR:
1809   case MAT_USE_INODES:
1810   case MAT_IGNORE_ZERO_ENTRIES:
1811     MatCheckPreallocated(A,1);
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1814     break;
1815   case MAT_ROW_ORIENTED:
1816     MatCheckPreallocated(A,1);
1817     a->roworiented = flg;
1818 
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1821     break;
1822   case MAT_NEW_DIAGONALS:
1823   case MAT_SORTED_FULL:
1824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1825     break;
1826   case MAT_IGNORE_OFF_PROC_ENTRIES:
1827     a->donotstash = flg;
1828     break;
1829   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1830   case MAT_SPD:
1831   case MAT_SYMMETRIC:
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833   case MAT_HERMITIAN:
1834   case MAT_SYMMETRY_ETERNAL:
1835     break;
1836   case MAT_SUBMAT_SINGLEIS:
1837     A->submat_singleis = flg;
1838     break;
1839   case MAT_STRUCTURE_ONLY:
1840     /* The option is handled directly by MatSetOption() */
1841     break;
1842   default:
1843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1849 {
1850   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1851   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1852   PetscErrorCode ierr;
1853   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1854   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1855   PetscInt       *cmap,*idx_p;
1856 
1857   PetscFunctionBegin;
1858   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1859   mat->getrowactive = PETSC_TRUE;
1860 
1861   if (!mat->rowvalues && (idx || v)) {
1862     /*
1863         allocate enough space to hold information from the longest row.
1864     */
1865     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1866     PetscInt   max = 1,tmp;
1867     for (i=0; i<matin->rmap->n; i++) {
1868       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1869       if (max < tmp) max = tmp;
1870     }
1871     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1872   }
1873 
1874   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1875   lrow = row - rstart;
1876 
1877   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1878   if (!v)   {pvA = 0; pvB = 0;}
1879   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1880   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1881   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1882   nztot = nzA + nzB;
1883 
1884   cmap = mat->garray;
1885   if (v  || idx) {
1886     if (nztot) {
1887       /* Sort by increasing column numbers, assuming A and B already sorted */
1888       PetscInt imark = -1;
1889       if (v) {
1890         *v = v_p = mat->rowvalues;
1891         for (i=0; i<nzB; i++) {
1892           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1893           else break;
1894         }
1895         imark = i;
1896         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1897         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1898       }
1899       if (idx) {
1900         *idx = idx_p = mat->rowindices;
1901         if (imark > -1) {
1902           for (i=0; i<imark; i++) {
1903             idx_p[i] = cmap[cworkB[i]];
1904           }
1905         } else {
1906           for (i=0; i<nzB; i++) {
1907             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1908             else break;
1909           }
1910           imark = i;
1911         }
1912         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1913         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1914       }
1915     } else {
1916       if (idx) *idx = 0;
1917       if (v)   *v   = 0;
1918     }
1919   }
1920   *nz  = nztot;
1921   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1922   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1927 {
1928   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1929 
1930   PetscFunctionBegin;
1931   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1932   aij->getrowactive = PETSC_FALSE;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1937 {
1938   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1939   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,cstart = mat->cmap->rstart;
1942   PetscReal      sum = 0.0;
1943   MatScalar      *v;
1944 
1945   PetscFunctionBegin;
1946   if (aij->size == 1) {
1947     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1948   } else {
1949     if (type == NORM_FROBENIUS) {
1950       v = amat->a;
1951       for (i=0; i<amat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       v = bmat->a;
1955       for (i=0; i<bmat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1961     } else if (type == NORM_1) { /* max column norm */
1962       PetscReal *tmp,*tmp2;
1963       PetscInt  *jj,*garray = aij->garray;
1964       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1965       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v     = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1982     } else if (type == NORM_INFINITY) { /* max row norm */
1983       PetscReal ntemp = 0.0;
1984       for (j=0; j<aij->A->rmap->n; j++) {
1985         v   = amat->a + amat->i[j];
1986         sum = 0.0;
1987         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         v = bmat->a + bmat->i[j];
1991         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         if (sum > ntemp) ntemp = sum;
1995       }
1996       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1997       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1998     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2004 {
2005   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2006   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2007   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2008   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2009   PetscErrorCode  ierr;
2010   Mat             B,A_diag,*B_diag;
2011   const MatScalar *array;
2012 
2013   PetscFunctionBegin;
2014   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2015   ai = Aloc->i; aj = Aloc->j;
2016   bi = Bloc->i; bj = Bloc->j;
2017   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2018     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2019     PetscSFNode          *oloc;
2020     PETSC_UNUSED PetscSF sf;
2021 
2022     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2023     /* compute d_nnz for preallocation */
2024     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2025     for (i=0; i<ai[ma]; i++) {
2026       d_nnz[aj[i]]++;
2027     }
2028     /* compute local off-diagonal contributions */
2029     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2030     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2031     /* map those to global */
2032     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2033     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2034     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2035     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2036     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2037     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2038     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2039 
2040     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2041     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2042     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2043     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2044     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2045     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2046   } else {
2047     B    = *matout;
2048     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2049   }
2050 
2051   b           = (Mat_MPIAIJ*)B->data;
2052   A_diag      = a->A;
2053   B_diag      = &b->A;
2054   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2055   A_diag_ncol = A_diag->cmap->N;
2056   B_diag_ilen = sub_B_diag->ilen;
2057   B_diag_i    = sub_B_diag->i;
2058 
2059   /* Set ilen for diagonal of B */
2060   for (i=0; i<A_diag_ncol; i++) {
2061     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2062   }
2063 
2064   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2065   very quickly (=without using MatSetValues), because all writes are local. */
2066   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2067 
2068   /* copy over the B part */
2069   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2070   array = Bloc->a;
2071   row   = A->rmap->rstart;
2072   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2073   cols_tmp = cols;
2074   for (i=0; i<mb; i++) {
2075     ncol = bi[i+1]-bi[i];
2076     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2077     row++;
2078     array += ncol; cols_tmp += ncol;
2079   }
2080   ierr = PetscFree(cols);CHKERRQ(ierr);
2081 
2082   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2085     *matout = B;
2086   } else {
2087     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2093 {
2094   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2095   Mat            a    = aij->A,b = aij->B;
2096   PetscErrorCode ierr;
2097   PetscInt       s1,s2,s3;
2098 
2099   PetscFunctionBegin;
2100   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2101   if (rr) {
2102     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2103     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2104     /* Overlap communication with computation. */
2105     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2106   }
2107   if (ll) {
2108     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2109     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2110     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2111   }
2112   /* scale  the diagonal block */
2113   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2114 
2115   if (rr) {
2116     /* Do a scatter end and then right scale the off-diagonal block */
2117     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2124 {
2125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2134 {
2135   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2136   Mat            a,b,c,d;
2137   PetscBool      flg;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   a = matA->A; b = matA->B;
2142   c = matB->A; d = matB->B;
2143 
2144   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2145   if (flg) {
2146     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2147   }
2148   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2153 {
2154   PetscErrorCode ierr;
2155   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2156   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2157 
2158   PetscFunctionBegin;
2159   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2160   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2161     /* because of the column compression in the off-processor part of the matrix a->B,
2162        the number of columns in a->B and b->B may be different, hence we cannot call
2163        the MatCopy() directly on the two parts. If need be, we can provide a more
2164        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2165        then copying the submatrices */
2166     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2167   } else {
2168     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2169     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2170   }
2171   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2176 {
2177   PetscErrorCode ierr;
2178 
2179   PetscFunctionBegin;
2180   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /*
2185    Computes the number of nonzeros per row needed for preallocation when X and Y
2186    have different nonzero structure.
2187 */
2188 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2189 {
2190   PetscInt       i,j,k,nzx,nzy;
2191 
2192   PetscFunctionBegin;
2193   /* Set the number of nonzeros in the new matrix */
2194   for (i=0; i<m; i++) {
2195     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2196     nzx = xi[i+1] - xi[i];
2197     nzy = yi[i+1] - yi[i];
2198     nnz[i] = 0;
2199     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2200       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2201       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2202       nnz[i]++;
2203     }
2204     for (; k<nzy; k++) nnz[i]++;
2205   }
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2210 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2211 {
2212   PetscErrorCode ierr;
2213   PetscInt       m = Y->rmap->N;
2214   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2215   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2216 
2217   PetscFunctionBegin;
2218   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2223 {
2224   PetscErrorCode ierr;
2225   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2226   PetscBLASInt   bnz,one=1;
2227   Mat_SeqAIJ     *x,*y;
2228 
2229   PetscFunctionBegin;
2230   if (str == SAME_NONZERO_PATTERN) {
2231     PetscScalar alpha = a;
2232     x    = (Mat_SeqAIJ*)xx->A->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     y    = (Mat_SeqAIJ*)yy->A->data;
2235     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2236     x    = (Mat_SeqAIJ*)xx->B->data;
2237     y    = (Mat_SeqAIJ*)yy->B->data;
2238     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2239     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2240     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2241   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2242     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2243   } else {
2244     Mat      B;
2245     PetscInt *nnz_d,*nnz_o;
2246     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2247     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2248     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2249     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2250     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2251     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2252     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2255     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2256     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2257     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2260   }
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2265 
2266 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2267 {
2268 #if defined(PETSC_USE_COMPLEX)
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2274   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2275 #else
2276   PetscFunctionBegin;
2277 #endif
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2282 {
2283   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2288   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2293 {
2294   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2299   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2304 {
2305   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2306   PetscErrorCode ierr;
2307   PetscInt       i,*idxb = 0;
2308   PetscScalar    *va,*vb;
2309   Vec            vtmp;
2310 
2311   PetscFunctionBegin;
2312   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2313   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2314   if (idx) {
2315     for (i=0; i<A->rmap->n; i++) {
2316       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2317     }
2318   }
2319 
2320   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2321   if (idx) {
2322     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2323   }
2324   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2325   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2326 
2327   for (i=0; i<A->rmap->n; i++) {
2328     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2329       va[i] = vb[i];
2330       if (idx) idx[i] = a->garray[idxb[i]];
2331     }
2332   }
2333 
2334   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2335   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2336   ierr = PetscFree(idxb);CHKERRQ(ierr);
2337   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2342 {
2343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2344   PetscErrorCode ierr;
2345   PetscInt       i,*idxb = 0;
2346   PetscScalar    *va,*vb;
2347   Vec            vtmp;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2351   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2352   if (idx) {
2353     for (i=0; i<A->cmap->n; i++) {
2354       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2355     }
2356   }
2357 
2358   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2359   if (idx) {
2360     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2361   }
2362   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2363   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2364 
2365   for (i=0; i<A->rmap->n; i++) {
2366     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2367       va[i] = vb[i];
2368       if (idx) idx[i] = a->garray[idxb[i]];
2369     }
2370   }
2371 
2372   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2373   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2374   ierr = PetscFree(idxb);CHKERRQ(ierr);
2375   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2380 {
2381   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2382   PetscInt       n      = A->rmap->n;
2383   PetscInt       cstart = A->cmap->rstart;
2384   PetscInt       *cmap  = mat->garray;
2385   PetscInt       *diagIdx, *offdiagIdx;
2386   Vec            diagV, offdiagV;
2387   PetscScalar    *a, *diagA, *offdiagA;
2388   PetscInt       r;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2394   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2395   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2396   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2397   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2398   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2399   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2400   for (r = 0; r < n; ++r) {
2401     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2402       a[r]   = diagA[r];
2403       idx[r] = cstart + diagIdx[r];
2404     } else {
2405       a[r]   = offdiagA[r];
2406       idx[r] = cmap[offdiagIdx[r]];
2407     }
2408   }
2409   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2411   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2412   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2413   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2414   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2419 {
2420   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2421   PetscInt       n      = A->rmap->n;
2422   PetscInt       cstart = A->cmap->rstart;
2423   PetscInt       *cmap  = mat->garray;
2424   PetscInt       *diagIdx, *offdiagIdx;
2425   Vec            diagV, offdiagV;
2426   PetscScalar    *a, *diagA, *offdiagA;
2427   PetscInt       r;
2428   PetscErrorCode ierr;
2429 
2430   PetscFunctionBegin;
2431   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2433   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2435   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2436   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2437   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2438   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2439   for (r = 0; r < n; ++r) {
2440     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2441       a[r]   = diagA[r];
2442       idx[r] = cstart + diagIdx[r];
2443     } else {
2444       a[r]   = offdiagA[r];
2445       idx[r] = cmap[offdiagIdx[r]];
2446     }
2447   }
2448   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2450   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2452   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2453   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2458 {
2459   PetscErrorCode ierr;
2460   Mat            *dummy;
2461 
2462   PetscFunctionBegin;
2463   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2464   *newmat = *dummy;
2465   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2470 {
2471   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2472   PetscErrorCode ierr;
2473 
2474   PetscFunctionBegin;
2475   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2476   A->factorerrortype = a->A->factorerrortype;
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2481 {
2482   PetscErrorCode ierr;
2483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2484 
2485   PetscFunctionBegin;
2486   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2487   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2488   if (x->assembled) {
2489     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2490   } else {
2491     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2492   }
2493   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2499 {
2500   PetscFunctionBegin;
2501   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2502   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 /*@
2507    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2508 
2509    Collective on Mat
2510 
2511    Input Parameters:
2512 +    A - the matrix
2513 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2514 
2515  Level: advanced
2516 
2517 @*/
2518 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2519 {
2520   PetscErrorCode       ierr;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2528 {
2529   PetscErrorCode       ierr;
2530   PetscBool            sc = PETSC_FALSE,flg;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2534   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2535   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2536   if (flg) {
2537     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2538   }
2539   ierr = PetscOptionsTail();CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2544 {
2545   PetscErrorCode ierr;
2546   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2547   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2548 
2549   PetscFunctionBegin;
2550   if (!Y->preallocated) {
2551     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2552   } else if (!aij->nz) {
2553     PetscInt nonew = aij->nonew;
2554     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2555     aij->nonew = nonew;
2556   }
2557   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2562 {
2563   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2568   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2569   if (d) {
2570     PetscInt rstart;
2571     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2572     *d += rstart;
2573 
2574   }
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2579 {
2580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2581   PetscErrorCode ierr;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 /* -------------------------------------------------------------------*/
2589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2590                                        MatGetRow_MPIAIJ,
2591                                        MatRestoreRow_MPIAIJ,
2592                                        MatMult_MPIAIJ,
2593                                 /* 4*/ MatMultAdd_MPIAIJ,
2594                                        MatMultTranspose_MPIAIJ,
2595                                        MatMultTransposeAdd_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*10*/ 0,
2600                                        0,
2601                                        0,
2602                                        MatSOR_MPIAIJ,
2603                                        MatTranspose_MPIAIJ,
2604                                 /*15*/ MatGetInfo_MPIAIJ,
2605                                        MatEqual_MPIAIJ,
2606                                        MatGetDiagonal_MPIAIJ,
2607                                        MatDiagonalScale_MPIAIJ,
2608                                        MatNorm_MPIAIJ,
2609                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2610                                        MatAssemblyEnd_MPIAIJ,
2611                                        MatSetOption_MPIAIJ,
2612                                        MatZeroEntries_MPIAIJ,
2613                                 /*24*/ MatZeroRows_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*29*/ MatSetUp_MPIAIJ,
2619                                        0,
2620                                        0,
2621                                        MatGetDiagonalBlock_MPIAIJ,
2622                                        0,
2623                                 /*34*/ MatDuplicate_MPIAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                 /*39*/ MatAXPY_MPIAIJ,
2629                                        MatCreateSubMatrices_MPIAIJ,
2630                                        MatIncreaseOverlap_MPIAIJ,
2631                                        MatGetValues_MPIAIJ,
2632                                        MatCopy_MPIAIJ,
2633                                 /*44*/ MatGetRowMax_MPIAIJ,
2634                                        MatScale_MPIAIJ,
2635                                        MatShift_MPIAIJ,
2636                                        MatDiagonalSet_MPIAIJ,
2637                                        MatZeroRowsColumns_MPIAIJ,
2638                                 /*49*/ MatSetRandom_MPIAIJ,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2644                                        0,
2645                                        MatSetUnfactored_MPIAIJ,
2646                                        MatPermute_MPIAIJ,
2647                                        0,
2648                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2649                                        MatDestroy_MPIAIJ,
2650                                        MatView_MPIAIJ,
2651                                        0,
2652                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2653                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2654                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2659                                        MatGetRowMinAbs_MPIAIJ,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*75*/ MatFDColoringApply_AIJ,
2665                                        MatSetFromOptions_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        MatFindZeroDiagonals_MPIAIJ,
2669                                 /*80*/ 0,
2670                                        0,
2671                                        0,
2672                                 /*83*/ MatLoad_MPIAIJ,
2673                                        MatIsSymmetric_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2679                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2680                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2681                                        MatPtAP_MPIAIJ_MPIAIJ,
2682                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2683                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                 /*99*/ 0,
2689                                        0,
2690                                        0,
2691                                        MatConjugate_MPIAIJ,
2692                                        0,
2693                                 /*104*/MatSetValuesRow_MPIAIJ,
2694                                        MatRealPart_MPIAIJ,
2695                                        MatImaginaryPart_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                 /*109*/0,
2699                                        0,
2700                                        MatGetRowMin_MPIAIJ,
2701                                        0,
2702                                        MatMissingDiagonal_MPIAIJ,
2703                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2704                                        0,
2705                                        MatGetGhosts_MPIAIJ,
2706                                        0,
2707                                        0,
2708                                 /*119*/0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        MatGetMultiProcBlock_MPIAIJ,
2713                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2714                                        MatGetColumnNorms_MPIAIJ,
2715                                        MatInvertBlockDiagonal_MPIAIJ,
2716                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2717                                        MatCreateSubMatricesMPI_MPIAIJ,
2718                                 /*129*/0,
2719                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2720                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2721                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2722                                        0,
2723                                 /*134*/0,
2724                                        0,
2725                                        MatRARt_MPIAIJ_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                 /*139*/MatSetBlockSizes_MPIAIJ,
2729                                        0,
2730                                        0,
2731                                        MatFDColoringSetUp_MPIXAIJ,
2732                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2733                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2734 };
2735 
2736 /* ----------------------------------------------------------------------------------------*/
2737 
2738 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2739 {
2740   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2745   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2750 {
2751   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2752   PetscErrorCode ierr;
2753 
2754   PetscFunctionBegin;
2755   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2756   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2761 {
2762   Mat_MPIAIJ     *b;
2763   PetscErrorCode ierr;
2764   PetscMPIInt    size;
2765 
2766   PetscFunctionBegin;
2767   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2768   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2769   b = (Mat_MPIAIJ*)B->data;
2770 
2771 #if defined(PETSC_USE_CTABLE)
2772   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2773 #else
2774   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2775 #endif
2776   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2777   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2778   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2779 
2780   /* Because the B will have been resized we simply destroy it and create a new one each time */
2781   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2782   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2783   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2784   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2785   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2786   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2787   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2788 
2789   if (!B->preallocated) {
2790     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2791     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2792     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2793     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2795   }
2796 
2797   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2799   B->preallocated  = PETSC_TRUE;
2800   B->was_assembled = PETSC_FALSE;
2801   B->assembled     = PETSC_FALSE;
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2806 {
2807   Mat_MPIAIJ     *b;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2812   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2813   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2814   b = (Mat_MPIAIJ*)B->data;
2815 
2816 #if defined(PETSC_USE_CTABLE)
2817   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2818 #else
2819   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2820 #endif
2821   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2822   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2823   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2824 
2825   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2826   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2827   B->preallocated  = PETSC_TRUE;
2828   B->was_assembled = PETSC_FALSE;
2829   B->assembled = PETSC_FALSE;
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2834 {
2835   Mat            mat;
2836   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2837   PetscErrorCode ierr;
2838 
2839   PetscFunctionBegin;
2840   *newmat = 0;
2841   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2842   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2843   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2844   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2845   a       = (Mat_MPIAIJ*)mat->data;
2846 
2847   mat->factortype   = matin->factortype;
2848   mat->assembled    = PETSC_TRUE;
2849   mat->insertmode   = NOT_SET_VALUES;
2850   mat->preallocated = PETSC_TRUE;
2851 
2852   a->size         = oldmat->size;
2853   a->rank         = oldmat->rank;
2854   a->donotstash   = oldmat->donotstash;
2855   a->roworiented  = oldmat->roworiented;
2856   a->rowindices   = 0;
2857   a->rowvalues    = 0;
2858   a->getrowactive = PETSC_FALSE;
2859 
2860   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2861   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2862 
2863   if (oldmat->colmap) {
2864 #if defined(PETSC_USE_CTABLE)
2865     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2866 #else
2867     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2868     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2869     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2870 #endif
2871   } else a->colmap = 0;
2872   if (oldmat->garray) {
2873     PetscInt len;
2874     len  = oldmat->B->cmap->n;
2875     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2876     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2877     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2878   } else a->garray = 0;
2879 
2880   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2881   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2882   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2883   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2884 
2885   if (oldmat->Mvctx_mpi1) {
2886     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2887     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2888   }
2889 
2890   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2891   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2892   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2893   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2894   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2895   *newmat = mat;
2896   PetscFunctionReturn(0);
2897 }
2898 
2899 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2900 {
2901   PetscBool      isbinary, ishdf5;
2902   PetscErrorCode ierr;
2903 
2904   PetscFunctionBegin;
2905   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2906   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2907   /* force binary viewer to load .info file if it has not yet done so */
2908   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2909   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2910   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2911   if (isbinary) {
2912     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2913   } else if (ishdf5) {
2914 #if defined(PETSC_HAVE_HDF5)
2915     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2916 #else
2917     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2918 #endif
2919   } else {
2920     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2921   }
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2926 {
2927   PetscScalar    *vals,*svals;
2928   MPI_Comm       comm;
2929   PetscErrorCode ierr;
2930   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2931   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2932   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2933   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2934   PetscInt       cend,cstart,n,*rowners;
2935   int            fd;
2936   PetscInt       bs = newMat->rmap->bs;
2937 
2938   PetscFunctionBegin;
2939   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2940   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2941   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2942   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2943   if (!rank) {
2944     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2945     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2946     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2947   }
2948 
2949   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2950   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2951   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2952   if (bs < 0) bs = 1;
2953 
2954   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2955   M    = header[1]; N = header[2];
2956 
2957   /* If global sizes are set, check if they are consistent with that given in the file */
2958   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2959   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2960 
2961   /* determine ownership of all (block) rows */
2962   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2963   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2964   else m = newMat->rmap->n; /* Set by user */
2965 
2966   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2967   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2968 
2969   /* First process needs enough room for process with most rows */
2970   if (!rank) {
2971     mmax = rowners[1];
2972     for (i=2; i<=size; i++) {
2973       mmax = PetscMax(mmax, rowners[i]);
2974     }
2975   } else mmax = -1;             /* unused, but compilers complain */
2976 
2977   rowners[0] = 0;
2978   for (i=2; i<=size; i++) {
2979     rowners[i] += rowners[i-1];
2980   }
2981   rstart = rowners[rank];
2982   rend   = rowners[rank+1];
2983 
2984   /* distribute row lengths to all processors */
2985   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2986   if (!rank) {
2987     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
2988     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2989     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2990     for (j=0; j<m; j++) {
2991       procsnz[0] += ourlens[j];
2992     }
2993     for (i=1; i<size; i++) {
2994       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
2995       /* calculate the number of nonzeros on each processor */
2996       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2997         procsnz[i] += rowlengths[j];
2998       }
2999       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3000     }
3001     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3002   } else {
3003     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3004   }
3005 
3006   if (!rank) {
3007     /* determine max buffer needed and allocate it */
3008     maxnz = 0;
3009     for (i=0; i<size; i++) {
3010       maxnz = PetscMax(maxnz,procsnz[i]);
3011     }
3012     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3013 
3014     /* read in my part of the matrix column indices  */
3015     nz   = procsnz[0];
3016     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3017     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3018 
3019     /* read in every one elses and ship off */
3020     for (i=1; i<size; i++) {
3021       nz   = procsnz[i];
3022       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3023       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3024     }
3025     ierr = PetscFree(cols);CHKERRQ(ierr);
3026   } else {
3027     /* determine buffer space needed for message */
3028     nz = 0;
3029     for (i=0; i<m; i++) {
3030       nz += ourlens[i];
3031     }
3032     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3033 
3034     /* receive message of column indices*/
3035     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3036   }
3037 
3038   /* determine column ownership if matrix is not square */
3039   if (N != M) {
3040     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3041     else n = newMat->cmap->n;
3042     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3043     cstart = cend - n;
3044   } else {
3045     cstart = rstart;
3046     cend   = rend;
3047     n      = cend - cstart;
3048   }
3049 
3050   /* loop over local rows, determining number of off diagonal entries */
3051   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3052   jj   = 0;
3053   for (i=0; i<m; i++) {
3054     for (j=0; j<ourlens[i]; j++) {
3055       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3056       jj++;
3057     }
3058   }
3059 
3060   for (i=0; i<m; i++) {
3061     ourlens[i] -= offlens[i];
3062   }
3063   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3064 
3065   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3066 
3067   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3068 
3069   for (i=0; i<m; i++) {
3070     ourlens[i] += offlens[i];
3071   }
3072 
3073   if (!rank) {
3074     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3075 
3076     /* read in my part of the matrix numerical values  */
3077     nz   = procsnz[0];
3078     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3079 
3080     /* insert into matrix */
3081     jj      = rstart;
3082     smycols = mycols;
3083     svals   = vals;
3084     for (i=0; i<m; i++) {
3085       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3086       smycols += ourlens[i];
3087       svals   += ourlens[i];
3088       jj++;
3089     }
3090 
3091     /* read in other processors and ship out */
3092     for (i=1; i<size; i++) {
3093       nz   = procsnz[i];
3094       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3095       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3096     }
3097     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3098   } else {
3099     /* receive numeric values */
3100     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3101 
3102     /* receive message of values*/
3103     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3104 
3105     /* insert into matrix */
3106     jj      = rstart;
3107     smycols = mycols;
3108     svals   = vals;
3109     for (i=0; i<m; i++) {
3110       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3111       smycols += ourlens[i];
3112       svals   += ourlens[i];
3113       jj++;
3114     }
3115   }
3116   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3117   ierr = PetscFree(vals);CHKERRQ(ierr);
3118   ierr = PetscFree(mycols);CHKERRQ(ierr);
3119   ierr = PetscFree(rowners);CHKERRQ(ierr);
3120   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3121   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3122   PetscFunctionReturn(0);
3123 }
3124 
3125 /* Not scalable because of ISAllGather() unless getting all columns. */
3126 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3127 {
3128   PetscErrorCode ierr;
3129   IS             iscol_local;
3130   PetscBool      isstride;
3131   PetscMPIInt    lisstride=0,gisstride;
3132 
3133   PetscFunctionBegin;
3134   /* check if we are grabbing all columns*/
3135   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3136 
3137   if (isstride) {
3138     PetscInt  start,len,mstart,mlen;
3139     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3140     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3141     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3142     if (mstart == start && mlen-mstart == len) lisstride = 1;
3143   }
3144 
3145   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3146   if (gisstride) {
3147     PetscInt N;
3148     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3149     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3150     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3151     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3152   } else {
3153     PetscInt cbs;
3154     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3155     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3156     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3157   }
3158 
3159   *isseq = iscol_local;
3160   PetscFunctionReturn(0);
3161 }
3162 
3163 /*
3164  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3165  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3166 
3167  Input Parameters:
3168    mat - matrix
3169    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3170            i.e., mat->rstart <= isrow[i] < mat->rend
3171    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3172            i.e., mat->cstart <= iscol[i] < mat->cend
3173  Output Parameter:
3174    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3175    iscol_o - sequential column index set for retrieving mat->B
3176    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3177  */
3178 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3179 {
3180   PetscErrorCode ierr;
3181   Vec            x,cmap;
3182   const PetscInt *is_idx;
3183   PetscScalar    *xarray,*cmaparray;
3184   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3185   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3186   Mat            B=a->B;
3187   Vec            lvec=a->lvec,lcmap;
3188   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3189   MPI_Comm       comm;
3190   VecScatter     Mvctx=a->Mvctx;
3191 
3192   PetscFunctionBegin;
3193   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3194   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3195 
3196   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3197   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3198   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3199   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3200   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3201 
3202   /* Get start indices */
3203   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3204   isstart -= ncols;
3205   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3206 
3207   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3208   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3209   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3210   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3211   for (i=0; i<ncols; i++) {
3212     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3213     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3214     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3215   }
3216   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3217   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3218   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3219 
3220   /* Get iscol_d */
3221   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3222   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3223   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3224 
3225   /* Get isrow_d */
3226   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3227   rstart = mat->rmap->rstart;
3228   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3229   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3230   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3231   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3232 
3233   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3234   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3235   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3236 
3237   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3238   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3239   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3240 
3241   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3242 
3243   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3244   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3245 
3246   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3247   /* off-process column indices */
3248   count = 0;
3249   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3250   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3251 
3252   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3253   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3254   for (i=0; i<Bn; i++) {
3255     if (PetscRealPart(xarray[i]) > -1.0) {
3256       idx[count]     = i;                   /* local column index in off-diagonal part B */
3257       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3258       count++;
3259     }
3260   }
3261   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3262   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3263 
3264   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3265   /* cannot ensure iscol_o has same blocksize as iscol! */
3266 
3267   ierr = PetscFree(idx);CHKERRQ(ierr);
3268   *garray = cmap1;
3269 
3270   ierr = VecDestroy(&x);CHKERRQ(ierr);
3271   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3272   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3277 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3278 {
3279   PetscErrorCode ierr;
3280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3281   Mat            M = NULL;
3282   MPI_Comm       comm;
3283   IS             iscol_d,isrow_d,iscol_o;
3284   Mat            Asub = NULL,Bsub = NULL;
3285   PetscInt       n;
3286 
3287   PetscFunctionBegin;
3288   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3289 
3290   if (call == MAT_REUSE_MATRIX) {
3291     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3292     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3293     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3294 
3295     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3296     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3297 
3298     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3299     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3300 
3301     /* Update diagonal and off-diagonal portions of submat */
3302     asub = (Mat_MPIAIJ*)(*submat)->data;
3303     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3304     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3305     if (n) {
3306       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3307     }
3308     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3309     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3310 
3311   } else { /* call == MAT_INITIAL_MATRIX) */
3312     const PetscInt *garray;
3313     PetscInt        BsubN;
3314 
3315     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3316     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3317 
3318     /* Create local submatrices Asub and Bsub */
3319     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3320     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3321 
3322     /* Create submatrix M */
3323     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3324 
3325     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3326     asub = (Mat_MPIAIJ*)M->data;
3327 
3328     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3329     n = asub->B->cmap->N;
3330     if (BsubN > n) {
3331       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3332       const PetscInt *idx;
3333       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3334       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3335 
3336       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3337       j = 0;
3338       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3339       for (i=0; i<n; i++) {
3340         if (j >= BsubN) break;
3341         while (subgarray[i] > garray[j]) j++;
3342 
3343         if (subgarray[i] == garray[j]) {
3344           idx_new[i] = idx[j++];
3345         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3346       }
3347       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3348 
3349       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3350       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3351 
3352     } else if (BsubN < n) {
3353       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3354     }
3355 
3356     ierr = PetscFree(garray);CHKERRQ(ierr);
3357     *submat = M;
3358 
3359     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3360     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3361     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3362 
3363     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3364     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3365 
3366     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3367     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3368   }
3369   PetscFunctionReturn(0);
3370 }
3371 
3372 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3373 {
3374   PetscErrorCode ierr;
3375   IS             iscol_local=NULL,isrow_d;
3376   PetscInt       csize;
3377   PetscInt       n,i,j,start,end;
3378   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3379   MPI_Comm       comm;
3380 
3381   PetscFunctionBegin;
3382   /* If isrow has same processor distribution as mat,
3383      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3384   if (call == MAT_REUSE_MATRIX) {
3385     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3386     if (isrow_d) {
3387       sameRowDist  = PETSC_TRUE;
3388       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3389     } else {
3390       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3391       if (iscol_local) {
3392         sameRowDist  = PETSC_TRUE;
3393         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3394       }
3395     }
3396   } else {
3397     /* Check if isrow has same processor distribution as mat */
3398     sameDist[0] = PETSC_FALSE;
3399     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3400     if (!n) {
3401       sameDist[0] = PETSC_TRUE;
3402     } else {
3403       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3404       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3405       if (i >= start && j < end) {
3406         sameDist[0] = PETSC_TRUE;
3407       }
3408     }
3409 
3410     /* Check if iscol has same processor distribution as mat */
3411     sameDist[1] = PETSC_FALSE;
3412     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3413     if (!n) {
3414       sameDist[1] = PETSC_TRUE;
3415     } else {
3416       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3417       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3418       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3419     }
3420 
3421     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3422     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3423     sameRowDist = tsameDist[0];
3424   }
3425 
3426   if (sameRowDist) {
3427     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3428       /* isrow and iscol have same processor distribution as mat */
3429       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3430       PetscFunctionReturn(0);
3431     } else { /* sameRowDist */
3432       /* isrow has same processor distribution as mat */
3433       if (call == MAT_INITIAL_MATRIX) {
3434         PetscBool sorted;
3435         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3436         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3437         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3438         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3439 
3440         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3441         if (sorted) {
3442           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3443           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3444           PetscFunctionReturn(0);
3445         }
3446       } else { /* call == MAT_REUSE_MATRIX */
3447         IS    iscol_sub;
3448         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3449         if (iscol_sub) {
3450           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3451           PetscFunctionReturn(0);
3452         }
3453       }
3454     }
3455   }
3456 
3457   /* General case: iscol -> iscol_local which has global size of iscol */
3458   if (call == MAT_REUSE_MATRIX) {
3459     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3460     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3461   } else {
3462     if (!iscol_local) {
3463       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3464     }
3465   }
3466 
3467   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3468   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3469 
3470   if (call == MAT_INITIAL_MATRIX) {
3471     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3472     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3473   }
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 /*@C
3478      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3479          and "off-diagonal" part of the matrix in CSR format.
3480 
3481    Collective
3482 
3483    Input Parameters:
3484 +  comm - MPI communicator
3485 .  A - "diagonal" portion of matrix
3486 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3487 -  garray - global index of B columns
3488 
3489    Output Parameter:
3490 .   mat - the matrix, with input A as its local diagonal matrix
3491    Level: advanced
3492 
3493    Notes:
3494        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3495        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3496 
3497 .seealso: MatCreateMPIAIJWithSplitArrays()
3498 @*/
3499 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3500 {
3501   PetscErrorCode ierr;
3502   Mat_MPIAIJ     *maij;
3503   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3504   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3505   PetscScalar    *oa=b->a;
3506   Mat            Bnew;
3507   PetscInt       m,n,N;
3508 
3509   PetscFunctionBegin;
3510   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3511   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3512   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3513   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3514   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3515   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3516 
3517   /* Get global columns of mat */
3518   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3519 
3520   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3521   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3522   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3523   maij = (Mat_MPIAIJ*)(*mat)->data;
3524 
3525   (*mat)->preallocated = PETSC_TRUE;
3526 
3527   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3528   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3529 
3530   /* Set A as diagonal portion of *mat */
3531   maij->A = A;
3532 
3533   nz = oi[m];
3534   for (i=0; i<nz; i++) {
3535     col   = oj[i];
3536     oj[i] = garray[col];
3537   }
3538 
3539    /* Set Bnew as off-diagonal portion of *mat */
3540   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3541   bnew        = (Mat_SeqAIJ*)Bnew->data;
3542   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3543   maij->B     = Bnew;
3544 
3545   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3546 
3547   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3548   b->free_a       = PETSC_FALSE;
3549   b->free_ij      = PETSC_FALSE;
3550   ierr = MatDestroy(&B);CHKERRQ(ierr);
3551 
3552   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3553   bnew->free_a       = PETSC_TRUE;
3554   bnew->free_ij      = PETSC_TRUE;
3555 
3556   /* condense columns of maij->B */
3557   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3558   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3560   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3561   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3566 
3567 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3568 {
3569   PetscErrorCode ierr;
3570   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3571   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3572   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3573   Mat            M,Msub,B=a->B;
3574   MatScalar      *aa;
3575   Mat_SeqAIJ     *aij;
3576   PetscInt       *garray = a->garray,*colsub,Ncols;
3577   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3578   IS             iscol_sub,iscmap;
3579   const PetscInt *is_idx,*cmap;
3580   PetscBool      allcolumns=PETSC_FALSE;
3581   MPI_Comm       comm;
3582 
3583   PetscFunctionBegin;
3584   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3585 
3586   if (call == MAT_REUSE_MATRIX) {
3587     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3588     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3589     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3590 
3591     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3592     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3593 
3594     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3595     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3596 
3597     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3598 
3599   } else { /* call == MAT_INITIAL_MATRIX) */
3600     PetscBool flg;
3601 
3602     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3603     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3604 
3605     /* (1) iscol -> nonscalable iscol_local */
3606     /* Check for special case: each processor gets entire matrix columns */
3607     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3608     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3609     if (allcolumns) {
3610       iscol_sub = iscol_local;
3611       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3612       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3613 
3614     } else {
3615       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3616       PetscInt *idx,*cmap1,k;
3617       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3618       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3619       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3620       count = 0;
3621       k     = 0;
3622       for (i=0; i<Ncols; i++) {
3623         j = is_idx[i];
3624         if (j >= cstart && j < cend) {
3625           /* diagonal part of mat */
3626           idx[count]     = j;
3627           cmap1[count++] = i; /* column index in submat */
3628         } else if (Bn) {
3629           /* off-diagonal part of mat */
3630           if (j == garray[k]) {
3631             idx[count]     = j;
3632             cmap1[count++] = i;  /* column index in submat */
3633           } else if (j > garray[k]) {
3634             while (j > garray[k] && k < Bn-1) k++;
3635             if (j == garray[k]) {
3636               idx[count]     = j;
3637               cmap1[count++] = i; /* column index in submat */
3638             }
3639           }
3640         }
3641       }
3642       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3643 
3644       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3645       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3646       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3647 
3648       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3649     }
3650 
3651     /* (3) Create sequential Msub */
3652     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3653   }
3654 
3655   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3656   aij  = (Mat_SeqAIJ*)(Msub)->data;
3657   ii   = aij->i;
3658   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3659 
3660   /*
3661       m - number of local rows
3662       Ncols - number of columns (same on all processors)
3663       rstart - first row in new global matrix generated
3664   */
3665   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3666 
3667   if (call == MAT_INITIAL_MATRIX) {
3668     /* (4) Create parallel newmat */
3669     PetscMPIInt    rank,size;
3670     PetscInt       csize;
3671 
3672     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3673     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3674 
3675     /*
3676         Determine the number of non-zeros in the diagonal and off-diagonal
3677         portions of the matrix in order to do correct preallocation
3678     */
3679 
3680     /* first get start and end of "diagonal" columns */
3681     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3682     if (csize == PETSC_DECIDE) {
3683       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3684       if (mglobal == Ncols) { /* square matrix */
3685         nlocal = m;
3686       } else {
3687         nlocal = Ncols/size + ((Ncols % size) > rank);
3688       }
3689     } else {
3690       nlocal = csize;
3691     }
3692     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3693     rstart = rend - nlocal;
3694     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3695 
3696     /* next, compute all the lengths */
3697     jj    = aij->j;
3698     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3699     olens = dlens + m;
3700     for (i=0; i<m; i++) {
3701       jend = ii[i+1] - ii[i];
3702       olen = 0;
3703       dlen = 0;
3704       for (j=0; j<jend; j++) {
3705         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3706         else dlen++;
3707         jj++;
3708       }
3709       olens[i] = olen;
3710       dlens[i] = dlen;
3711     }
3712 
3713     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3714     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3715 
3716     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3717     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3718     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3719     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3720     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3721     ierr = PetscFree(dlens);CHKERRQ(ierr);
3722 
3723   } else { /* call == MAT_REUSE_MATRIX */
3724     M    = *newmat;
3725     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3726     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3727     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3728     /*
3729          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3730        rather than the slower MatSetValues().
3731     */
3732     M->was_assembled = PETSC_TRUE;
3733     M->assembled     = PETSC_FALSE;
3734   }
3735 
3736   /* (5) Set values of Msub to *newmat */
3737   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3738   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3739 
3740   jj   = aij->j;
3741   aa   = aij->a;
3742   for (i=0; i<m; i++) {
3743     row = rstart + i;
3744     nz  = ii[i+1] - ii[i];
3745     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3746     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3747     jj += nz; aa += nz;
3748   }
3749   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3750 
3751   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753 
3754   ierr = PetscFree(colsub);CHKERRQ(ierr);
3755 
3756   /* save Msub, iscol_sub and iscmap used in processor for next request */
3757   if (call ==  MAT_INITIAL_MATRIX) {
3758     *newmat = M;
3759     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3760     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3761 
3762     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3763     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3764 
3765     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3766     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3767 
3768     if (iscol_local) {
3769       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3770       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3771     }
3772   }
3773   PetscFunctionReturn(0);
3774 }
3775 
3776 /*
3777     Not great since it makes two copies of the submatrix, first an SeqAIJ
3778   in local and then by concatenating the local matrices the end result.
3779   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3780 
3781   Note: This requires a sequential iscol with all indices.
3782 */
3783 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3784 {
3785   PetscErrorCode ierr;
3786   PetscMPIInt    rank,size;
3787   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3788   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3789   Mat            M,Mreuse;
3790   MatScalar      *aa,*vwork;
3791   MPI_Comm       comm;
3792   Mat_SeqAIJ     *aij;
3793   PetscBool      colflag,allcolumns=PETSC_FALSE;
3794 
3795   PetscFunctionBegin;
3796   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3798   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3799 
3800   /* Check for special case: each processor gets entire matrix columns */
3801   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3802   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3803   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3804 
3805   if (call ==  MAT_REUSE_MATRIX) {
3806     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3807     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3808     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3809   } else {
3810     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3811   }
3812 
3813   /*
3814       m - number of local rows
3815       n - number of columns (same on all processors)
3816       rstart - first row in new global matrix generated
3817   */
3818   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3819   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3820   if (call == MAT_INITIAL_MATRIX) {
3821     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3822     ii  = aij->i;
3823     jj  = aij->j;
3824 
3825     /*
3826         Determine the number of non-zeros in the diagonal and off-diagonal
3827         portions of the matrix in order to do correct preallocation
3828     */
3829 
3830     /* first get start and end of "diagonal" columns */
3831     if (csize == PETSC_DECIDE) {
3832       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3833       if (mglobal == n) { /* square matrix */
3834         nlocal = m;
3835       } else {
3836         nlocal = n/size + ((n % size) > rank);
3837       }
3838     } else {
3839       nlocal = csize;
3840     }
3841     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3842     rstart = rend - nlocal;
3843     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3844 
3845     /* next, compute all the lengths */
3846     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3847     olens = dlens + m;
3848     for (i=0; i<m; i++) {
3849       jend = ii[i+1] - ii[i];
3850       olen = 0;
3851       dlen = 0;
3852       for (j=0; j<jend; j++) {
3853         if (*jj < rstart || *jj >= rend) olen++;
3854         else dlen++;
3855         jj++;
3856       }
3857       olens[i] = olen;
3858       dlens[i] = dlen;
3859     }
3860     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3861     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3862     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3863     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3864     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3865     ierr = PetscFree(dlens);CHKERRQ(ierr);
3866   } else {
3867     PetscInt ml,nl;
3868 
3869     M    = *newmat;
3870     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3871     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3872     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3873     /*
3874          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3875        rather than the slower MatSetValues().
3876     */
3877     M->was_assembled = PETSC_TRUE;
3878     M->assembled     = PETSC_FALSE;
3879   }
3880   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3881   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3882   ii   = aij->i;
3883   jj   = aij->j;
3884   aa   = aij->a;
3885   for (i=0; i<m; i++) {
3886     row   = rstart + i;
3887     nz    = ii[i+1] - ii[i];
3888     cwork = jj;     jj += nz;
3889     vwork = aa;     aa += nz;
3890     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3891   }
3892 
3893   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3894   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3895   *newmat = M;
3896 
3897   /* save submatrix used in processor for next request */
3898   if (call ==  MAT_INITIAL_MATRIX) {
3899     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3900     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3901   }
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3906 {
3907   PetscInt       m,cstart, cend,j,nnz,i,d;
3908   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3909   const PetscInt *JJ;
3910   PetscErrorCode ierr;
3911   PetscBool      nooffprocentries;
3912 
3913   PetscFunctionBegin;
3914   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3915 
3916   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3917   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3918   m      = B->rmap->n;
3919   cstart = B->cmap->rstart;
3920   cend   = B->cmap->rend;
3921   rstart = B->rmap->rstart;
3922 
3923   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3924 
3925 #if defined(PETSC_USE_DEBUG)
3926   for (i=0; i<m; i++) {
3927     nnz = Ii[i+1]- Ii[i];
3928     JJ  = J + Ii[i];
3929     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3930     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3931     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3932   }
3933 #endif
3934 
3935   for (i=0; i<m; i++) {
3936     nnz     = Ii[i+1]- Ii[i];
3937     JJ      = J + Ii[i];
3938     nnz_max = PetscMax(nnz_max,nnz);
3939     d       = 0;
3940     for (j=0; j<nnz; j++) {
3941       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3942     }
3943     d_nnz[i] = d;
3944     o_nnz[i] = nnz - d;
3945   }
3946   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3947   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3948 
3949   for (i=0; i<m; i++) {
3950     ii   = i + rstart;
3951     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3952   }
3953   nooffprocentries    = B->nooffprocentries;
3954   B->nooffprocentries = PETSC_TRUE;
3955   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3956   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3957   B->nooffprocentries = nooffprocentries;
3958 
3959   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3960   PetscFunctionReturn(0);
3961 }
3962 
3963 /*@
3964    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3965    (the default parallel PETSc format).
3966 
3967    Collective
3968 
3969    Input Parameters:
3970 +  B - the matrix
3971 .  i - the indices into j for the start of each local row (starts with zero)
3972 .  j - the column indices for each local row (starts with zero)
3973 -  v - optional values in the matrix
3974 
3975    Level: developer
3976 
3977    Notes:
3978        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3979      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3980      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3981 
3982        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3983 
3984        The format which is used for the sparse matrix input, is equivalent to a
3985     row-major ordering.. i.e for the following matrix, the input data expected is
3986     as shown
3987 
3988 $        1 0 0
3989 $        2 0 3     P0
3990 $       -------
3991 $        4 5 6     P1
3992 $
3993 $     Process0 [P0]: rows_owned=[0,1]
3994 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3995 $        j =  {0,0,2}  [size = 3]
3996 $        v =  {1,2,3}  [size = 3]
3997 $
3998 $     Process1 [P1]: rows_owned=[2]
3999 $        i =  {0,3}    [size = nrow+1  = 1+1]
4000 $        j =  {0,1,2}  [size = 3]
4001 $        v =  {4,5,6}  [size = 3]
4002 
4003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4004           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4005 @*/
4006 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4007 {
4008   PetscErrorCode ierr;
4009 
4010   PetscFunctionBegin;
4011   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4012   PetscFunctionReturn(0);
4013 }
4014 
4015 /*@C
4016    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4017    (the default parallel PETSc format).  For good matrix assembly performance
4018    the user should preallocate the matrix storage by setting the parameters
4019    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4020    performance can be increased by more than a factor of 50.
4021 
4022    Collective
4023 
4024    Input Parameters:
4025 +  B - the matrix
4026 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4027            (same value is used for all local rows)
4028 .  d_nnz - array containing the number of nonzeros in the various rows of the
4029            DIAGONAL portion of the local submatrix (possibly different for each row)
4030            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4031            The size of this array is equal to the number of local rows, i.e 'm'.
4032            For matrices that will be factored, you must leave room for (and set)
4033            the diagonal entry even if it is zero.
4034 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4035            submatrix (same value is used for all local rows).
4036 -  o_nnz - array containing the number of nonzeros in the various rows of the
4037            OFF-DIAGONAL portion of the local submatrix (possibly different for
4038            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4039            structure. The size of this array is equal to the number
4040            of local rows, i.e 'm'.
4041 
4042    If the *_nnz parameter is given then the *_nz parameter is ignored
4043 
4044    The AIJ format (also called the Yale sparse matrix format or
4045    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4046    storage.  The stored row and column indices begin with zero.
4047    See Users-Manual: ch_mat for details.
4048 
4049    The parallel matrix is partitioned such that the first m0 rows belong to
4050    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4051    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4052 
4053    The DIAGONAL portion of the local submatrix of a processor can be defined
4054    as the submatrix which is obtained by extraction the part corresponding to
4055    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4056    first row that belongs to the processor, r2 is the last row belonging to
4057    the this processor, and c1-c2 is range of indices of the local part of a
4058    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4059    common case of a square matrix, the row and column ranges are the same and
4060    the DIAGONAL part is also square. The remaining portion of the local
4061    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4062 
4063    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4064 
4065    You can call MatGetInfo() to get information on how effective the preallocation was;
4066    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4067    You can also run with the option -info and look for messages with the string
4068    malloc in them to see if additional memory allocation was needed.
4069 
4070    Example usage:
4071 
4072    Consider the following 8x8 matrix with 34 non-zero values, that is
4073    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4074    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4075    as follows:
4076 
4077 .vb
4078             1  2  0  |  0  3  0  |  0  4
4079     Proc0   0  5  6  |  7  0  0  |  8  0
4080             9  0 10  | 11  0  0  | 12  0
4081     -------------------------------------
4082            13  0 14  | 15 16 17  |  0  0
4083     Proc1   0 18  0  | 19 20 21  |  0  0
4084             0  0  0  | 22 23  0  | 24  0
4085     -------------------------------------
4086     Proc2  25 26 27  |  0  0 28  | 29  0
4087            30  0  0  | 31 32 33  |  0 34
4088 .ve
4089 
4090    This can be represented as a collection of submatrices as:
4091 
4092 .vb
4093       A B C
4094       D E F
4095       G H I
4096 .ve
4097 
4098    Where the submatrices A,B,C are owned by proc0, D,E,F are
4099    owned by proc1, G,H,I are owned by proc2.
4100 
4101    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4102    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'M','N' parameters are 8,8, and have the same values on all procs.
4104 
4105    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4106    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4107    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4108    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4109    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4110    matrix, ans [DF] as another SeqAIJ matrix.
4111 
4112    When d_nz, o_nz parameters are specified, d_nz storage elements are
4113    allocated for every row of the local diagonal submatrix, and o_nz
4114    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4115    One way to choose d_nz and o_nz is to use the max nonzerors per local
4116    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4117    In this case, the values of d_nz,o_nz are:
4118 .vb
4119      proc0 : dnz = 2, o_nz = 2
4120      proc1 : dnz = 3, o_nz = 2
4121      proc2 : dnz = 1, o_nz = 4
4122 .ve
4123    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4124    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4125    for proc3. i.e we are using 12+15+10=37 storage locations to store
4126    34 values.
4127 
4128    When d_nnz, o_nnz parameters are specified, the storage is specified
4129    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4130    In the above case the values for d_nnz,o_nnz are:
4131 .vb
4132      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4133      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4134      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4135 .ve
4136    Here the space allocated is sum of all the above values i.e 34, and
4137    hence pre-allocation is perfect.
4138 
4139    Level: intermediate
4140 
4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4142           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4143 @*/
4144 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4145 {
4146   PetscErrorCode ierr;
4147 
4148   PetscFunctionBegin;
4149   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4150   PetscValidType(B,1);
4151   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4152   PetscFunctionReturn(0);
4153 }
4154 
4155 /*@
4156      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4157          CSR format for the local rows.
4158 
4159    Collective
4160 
4161    Input Parameters:
4162 +  comm - MPI communicator
4163 .  m - number of local rows (Cannot be PETSC_DECIDE)
4164 .  n - This value should be the same as the local size used in creating the
4165        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4166        calculated if N is given) For square matrices n is almost always m.
4167 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4168 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4169 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4170 .   j - column indices
4171 -   a - matrix values
4172 
4173    Output Parameter:
4174 .   mat - the matrix
4175 
4176    Level: intermediate
4177 
4178    Notes:
4179        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4180      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4181      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4182 
4183        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4184 
4185        The format which is used for the sparse matrix input, is equivalent to a
4186     row-major ordering.. i.e for the following matrix, the input data expected is
4187     as shown
4188 
4189        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4190 
4191 $        1 0 0
4192 $        2 0 3     P0
4193 $       -------
4194 $        4 5 6     P1
4195 $
4196 $     Process0 [P0]: rows_owned=[0,1]
4197 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4198 $        j =  {0,0,2}  [size = 3]
4199 $        v =  {1,2,3}  [size = 3]
4200 $
4201 $     Process1 [P1]: rows_owned=[2]
4202 $        i =  {0,3}    [size = nrow+1  = 1+1]
4203 $        j =  {0,1,2}  [size = 3]
4204 $        v =  {4,5,6}  [size = 3]
4205 
4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4207           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4208 @*/
4209 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4210 {
4211   PetscErrorCode ierr;
4212 
4213   PetscFunctionBegin;
4214   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4215   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4216   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4217   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4218   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4219   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4220   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4221   PetscFunctionReturn(0);
4222 }
4223 
4224 /*@
4225      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4226          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4227 
4228    Collective
4229 
4230    Input Parameters:
4231 +  mat - the matrix
4232 .  m - number of local rows (Cannot be PETSC_DECIDE)
4233 .  n - This value should be the same as the local size used in creating the
4234        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4235        calculated if N is given) For square matrices n is almost always m.
4236 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4237 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4238 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4239 .  J - column indices
4240 -  v - matrix values
4241 
4242    Level: intermediate
4243 
4244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4245           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4246 @*/
4247 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4248 {
4249   PetscErrorCode ierr;
4250   PetscInt       cstart,nnz,i,j;
4251   PetscInt       *ld;
4252   PetscBool      nooffprocentries;
4253   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4254   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4255   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4256   const PetscInt *Adi = Ad->i;
4257   PetscInt       ldi,Iii,md;
4258 
4259   PetscFunctionBegin;
4260   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4261   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4262   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4263   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4264 
4265   cstart = mat->cmap->rstart;
4266   if (!Aij->ld) {
4267     /* count number of entries below block diagonal */
4268     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4269     Aij->ld = ld;
4270     for (i=0; i<m; i++) {
4271       nnz  = Ii[i+1]- Ii[i];
4272       j     = 0;
4273       while  (J[j] < cstart && j < nnz) {j++;}
4274       J    += nnz;
4275       ld[i] = j;
4276     }
4277   } else {
4278     ld = Aij->ld;
4279   }
4280 
4281   for (i=0; i<m; i++) {
4282     nnz  = Ii[i+1]- Ii[i];
4283     Iii  = Ii[i];
4284     ldi  = ld[i];
4285     md   = Adi[i+1]-Adi[i];
4286     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4287     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4288     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4289     ad  += md;
4290     ao  += nnz - md;
4291   }
4292   nooffprocentries      = mat->nooffprocentries;
4293   mat->nooffprocentries = PETSC_TRUE;
4294   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4295   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4296   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4297   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4298   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4299   mat->nooffprocentries = nooffprocentries;
4300   PetscFunctionReturn(0);
4301 }
4302 
4303 /*@C
4304    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4305    (the default parallel PETSc format).  For good matrix assembly performance
4306    the user should preallocate the matrix storage by setting the parameters
4307    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4308    performance can be increased by more than a factor of 50.
4309 
4310    Collective
4311 
4312    Input Parameters:
4313 +  comm - MPI communicator
4314 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4315            This value should be the same as the local size used in creating the
4316            y vector for the matrix-vector product y = Ax.
4317 .  n - This value should be the same as the local size used in creating the
4318        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4319        calculated if N is given) For square matrices n is almost always m.
4320 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4321 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4322 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4323            (same value is used for all local rows)
4324 .  d_nnz - array containing the number of nonzeros in the various rows of the
4325            DIAGONAL portion of the local submatrix (possibly different for each row)
4326            or NULL, if d_nz is used to specify the nonzero structure.
4327            The size of this array is equal to the number of local rows, i.e 'm'.
4328 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4329            submatrix (same value is used for all local rows).
4330 -  o_nnz - array containing the number of nonzeros in the various rows of the
4331            OFF-DIAGONAL portion of the local submatrix (possibly different for
4332            each row) or NULL, if o_nz is used to specify the nonzero
4333            structure. The size of this array is equal to the number
4334            of local rows, i.e 'm'.
4335 
4336    Output Parameter:
4337 .  A - the matrix
4338 
4339    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4340    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4341    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4342 
4343    Notes:
4344    If the *_nnz parameter is given then the *_nz parameter is ignored
4345 
4346    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4347    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4348    storage requirements for this matrix.
4349 
4350    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4351    processor than it must be used on all processors that share the object for
4352    that argument.
4353 
4354    The user MUST specify either the local or global matrix dimensions
4355    (possibly both).
4356 
4357    The parallel matrix is partitioned across processors such that the
4358    first m0 rows belong to process 0, the next m1 rows belong to
4359    process 1, the next m2 rows belong to process 2 etc.. where
4360    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4361    values corresponding to [m x N] submatrix.
4362 
4363    The columns are logically partitioned with the n0 columns belonging
4364    to 0th partition, the next n1 columns belonging to the next
4365    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4366 
4367    The DIAGONAL portion of the local submatrix on any given processor
4368    is the submatrix corresponding to the rows and columns m,n
4369    corresponding to the given processor. i.e diagonal matrix on
4370    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4371    etc. The remaining portion of the local submatrix [m x (N-n)]
4372    constitute the OFF-DIAGONAL portion. The example below better
4373    illustrates this concept.
4374 
4375    For a square global matrix we define each processor's diagonal portion
4376    to be its local rows and the corresponding columns (a square submatrix);
4377    each processor's off-diagonal portion encompasses the remainder of the
4378    local matrix (a rectangular submatrix).
4379 
4380    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4381 
4382    When calling this routine with a single process communicator, a matrix of
4383    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4384    type of communicator, use the construction mechanism
4385 .vb
4386      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4387 .ve
4388 
4389 $     MatCreate(...,&A);
4390 $     MatSetType(A,MATMPIAIJ);
4391 $     MatSetSizes(A, m,n,M,N);
4392 $     MatMPIAIJSetPreallocation(A,...);
4393 
4394    By default, this format uses inodes (identical nodes) when possible.
4395    We search for consecutive rows with the same nonzero structure, thereby
4396    reusing matrix information to achieve increased efficiency.
4397 
4398    Options Database Keys:
4399 +  -mat_no_inode  - Do not use inodes
4400 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4401 
4402 
4403 
4404    Example usage:
4405 
4406    Consider the following 8x8 matrix with 34 non-zero values, that is
4407    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4408    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4409    as follows
4410 
4411 .vb
4412             1  2  0  |  0  3  0  |  0  4
4413     Proc0   0  5  6  |  7  0  0  |  8  0
4414             9  0 10  | 11  0  0  | 12  0
4415     -------------------------------------
4416            13  0 14  | 15 16 17  |  0  0
4417     Proc1   0 18  0  | 19 20 21  |  0  0
4418             0  0  0  | 22 23  0  | 24  0
4419     -------------------------------------
4420     Proc2  25 26 27  |  0  0 28  | 29  0
4421            30  0  0  | 31 32 33  |  0 34
4422 .ve
4423 
4424    This can be represented as a collection of submatrices as
4425 
4426 .vb
4427       A B C
4428       D E F
4429       G H I
4430 .ve
4431 
4432    Where the submatrices A,B,C are owned by proc0, D,E,F are
4433    owned by proc1, G,H,I are owned by proc2.
4434 
4435    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4436    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4437    The 'M','N' parameters are 8,8, and have the same values on all procs.
4438 
4439    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4440    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4441    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4442    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4443    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4444    matrix, ans [DF] as another SeqAIJ matrix.
4445 
4446    When d_nz, o_nz parameters are specified, d_nz storage elements are
4447    allocated for every row of the local diagonal submatrix, and o_nz
4448    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4449    One way to choose d_nz and o_nz is to use the max nonzerors per local
4450    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4451    In this case, the values of d_nz,o_nz are
4452 .vb
4453      proc0 : dnz = 2, o_nz = 2
4454      proc1 : dnz = 3, o_nz = 2
4455      proc2 : dnz = 1, o_nz = 4
4456 .ve
4457    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4458    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4459    for proc3. i.e we are using 12+15+10=37 storage locations to store
4460    34 values.
4461 
4462    When d_nnz, o_nnz parameters are specified, the storage is specified
4463    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4464    In the above case the values for d_nnz,o_nnz are
4465 .vb
4466      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4467      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4468      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4469 .ve
4470    Here the space allocated is sum of all the above values i.e 34, and
4471    hence pre-allocation is perfect.
4472 
4473    Level: intermediate
4474 
4475 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4476           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4477 @*/
4478 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4479 {
4480   PetscErrorCode ierr;
4481   PetscMPIInt    size;
4482 
4483   PetscFunctionBegin;
4484   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4485   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4486   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4487   if (size > 1) {
4488     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4489     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4490   } else {
4491     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4492     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4493   }
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4498 {
4499   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4500   PetscBool      flg;
4501   PetscErrorCode ierr;
4502 
4503   PetscFunctionBegin;
4504   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4505   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4506   if (Ad)     *Ad     = a->A;
4507   if (Ao)     *Ao     = a->B;
4508   if (colmap) *colmap = a->garray;
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4513 {
4514   PetscErrorCode ierr;
4515   PetscInt       m,N,i,rstart,nnz,Ii;
4516   PetscInt       *indx;
4517   PetscScalar    *values;
4518 
4519   PetscFunctionBegin;
4520   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4521   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4522     PetscInt       *dnz,*onz,sum,bs,cbs;
4523 
4524     if (n == PETSC_DECIDE) {
4525       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4526     }
4527     /* Check sum(n) = N */
4528     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4529     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4530 
4531     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4532     rstart -= m;
4533 
4534     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4535     for (i=0; i<m; i++) {
4536       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4537       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4538       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4539     }
4540 
4541     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4542     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4543     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4544     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4545     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4546     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4547     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4548     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4549   }
4550 
4551   /* numeric phase */
4552   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4553   for (i=0; i<m; i++) {
4554     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4555     Ii   = i + rstart;
4556     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4557     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4558   }
4559   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4560   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4565 {
4566   PetscErrorCode    ierr;
4567   PetscMPIInt       rank;
4568   PetscInt          m,N,i,rstart,nnz;
4569   size_t            len;
4570   const PetscInt    *indx;
4571   PetscViewer       out;
4572   char              *name;
4573   Mat               B;
4574   const PetscScalar *values;
4575 
4576   PetscFunctionBegin;
4577   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4578   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4579   /* Should this be the type of the diagonal block of A? */
4580   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4581   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4582   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4583   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4584   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4585   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4586   for (i=0; i<m; i++) {
4587     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4588     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4589     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4590   }
4591   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593 
4594   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4595   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4596   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4597   sprintf(name,"%s.%d",outfile,rank);
4598   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4599   ierr = PetscFree(name);CHKERRQ(ierr);
4600   ierr = MatView(B,out);CHKERRQ(ierr);
4601   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4602   ierr = MatDestroy(&B);CHKERRQ(ierr);
4603   PetscFunctionReturn(0);
4604 }
4605 
4606 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4607 {
4608   PetscErrorCode      ierr;
4609   Mat_Merge_SeqsToMPI *merge;
4610   PetscContainer      container;
4611 
4612   PetscFunctionBegin;
4613   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4614   if (container) {
4615     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4616     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4617     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4618     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4619     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4620     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4621     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4622     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4623     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4624     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4625     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4626     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4627     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4628     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4629     ierr = PetscFree(merge);CHKERRQ(ierr);
4630     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4631   }
4632   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4633   PetscFunctionReturn(0);
4634 }
4635 
4636 #include <../src/mat/utils/freespace.h>
4637 #include <petscbt.h>
4638 
4639 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4640 {
4641   PetscErrorCode      ierr;
4642   MPI_Comm            comm;
4643   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4644   PetscMPIInt         size,rank,taga,*len_s;
4645   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4646   PetscInt            proc,m;
4647   PetscInt            **buf_ri,**buf_rj;
4648   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4649   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4650   MPI_Request         *s_waits,*r_waits;
4651   MPI_Status          *status;
4652   MatScalar           *aa=a->a;
4653   MatScalar           **abuf_r,*ba_i;
4654   Mat_Merge_SeqsToMPI *merge;
4655   PetscContainer      container;
4656 
4657   PetscFunctionBegin;
4658   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4659   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4660 
4661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4662   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4663 
4664   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4665   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4666 
4667   bi     = merge->bi;
4668   bj     = merge->bj;
4669   buf_ri = merge->buf_ri;
4670   buf_rj = merge->buf_rj;
4671 
4672   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4673   owners = merge->rowmap->range;
4674   len_s  = merge->len_s;
4675 
4676   /* send and recv matrix values */
4677   /*-----------------------------*/
4678   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4679   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4680 
4681   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4682   for (proc=0,k=0; proc<size; proc++) {
4683     if (!len_s[proc]) continue;
4684     i    = owners[proc];
4685     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4686     k++;
4687   }
4688 
4689   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4690   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4691   ierr = PetscFree(status);CHKERRQ(ierr);
4692 
4693   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4694   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4695 
4696   /* insert mat values of mpimat */
4697   /*----------------------------*/
4698   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4699   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4700 
4701   for (k=0; k<merge->nrecv; k++) {
4702     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4703     nrows       = *(buf_ri_k[k]);
4704     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4705     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4706   }
4707 
4708   /* set values of ba */
4709   m = merge->rowmap->n;
4710   for (i=0; i<m; i++) {
4711     arow = owners[rank] + i;
4712     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4713     bnzi = bi[i+1] - bi[i];
4714     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4715 
4716     /* add local non-zero vals of this proc's seqmat into ba */
4717     anzi   = ai[arow+1] - ai[arow];
4718     aj     = a->j + ai[arow];
4719     aa     = a->a + ai[arow];
4720     nextaj = 0;
4721     for (j=0; nextaj<anzi; j++) {
4722       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4723         ba_i[j] += aa[nextaj++];
4724       }
4725     }
4726 
4727     /* add received vals into ba */
4728     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4729       /* i-th row */
4730       if (i == *nextrow[k]) {
4731         anzi   = *(nextai[k]+1) - *nextai[k];
4732         aj     = buf_rj[k] + *(nextai[k]);
4733         aa     = abuf_r[k] + *(nextai[k]);
4734         nextaj = 0;
4735         for (j=0; nextaj<anzi; j++) {
4736           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4737             ba_i[j] += aa[nextaj++];
4738           }
4739         }
4740         nextrow[k]++; nextai[k]++;
4741       }
4742     }
4743     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4744   }
4745   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4746   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4747 
4748   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4749   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4750   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4751   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4752   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4753   PetscFunctionReturn(0);
4754 }
4755 
4756 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4757 {
4758   PetscErrorCode      ierr;
4759   Mat                 B_mpi;
4760   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4761   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4762   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4763   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4764   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4765   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4766   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4767   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4768   MPI_Status          *status;
4769   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4770   PetscBT             lnkbt;
4771   Mat_Merge_SeqsToMPI *merge;
4772   PetscContainer      container;
4773 
4774   PetscFunctionBegin;
4775   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4776 
4777   /* make sure it is a PETSc comm */
4778   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4779   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4780   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4781 
4782   ierr = PetscNew(&merge);CHKERRQ(ierr);
4783   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4784 
4785   /* determine row ownership */
4786   /*---------------------------------------------------------*/
4787   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4788   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4789   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4790   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4791   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4792   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4793   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4794 
4795   m      = merge->rowmap->n;
4796   owners = merge->rowmap->range;
4797 
4798   /* determine the number of messages to send, their lengths */
4799   /*---------------------------------------------------------*/
4800   len_s = merge->len_s;
4801 
4802   len          = 0; /* length of buf_si[] */
4803   merge->nsend = 0;
4804   for (proc=0; proc<size; proc++) {
4805     len_si[proc] = 0;
4806     if (proc == rank) {
4807       len_s[proc] = 0;
4808     } else {
4809       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4810       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4811     }
4812     if (len_s[proc]) {
4813       merge->nsend++;
4814       nrows = 0;
4815       for (i=owners[proc]; i<owners[proc+1]; i++) {
4816         if (ai[i+1] > ai[i]) nrows++;
4817       }
4818       len_si[proc] = 2*(nrows+1);
4819       len         += len_si[proc];
4820     }
4821   }
4822 
4823   /* determine the number and length of messages to receive for ij-structure */
4824   /*-------------------------------------------------------------------------*/
4825   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4826   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4827 
4828   /* post the Irecv of j-structure */
4829   /*-------------------------------*/
4830   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4831   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4832 
4833   /* post the Isend of j-structure */
4834   /*--------------------------------*/
4835   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4836 
4837   for (proc=0, k=0; proc<size; proc++) {
4838     if (!len_s[proc]) continue;
4839     i    = owners[proc];
4840     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4841     k++;
4842   }
4843 
4844   /* receives and sends of j-structure are complete */
4845   /*------------------------------------------------*/
4846   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4847   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4848 
4849   /* send and recv i-structure */
4850   /*---------------------------*/
4851   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4852   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4853 
4854   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4855   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4856   for (proc=0,k=0; proc<size; proc++) {
4857     if (!len_s[proc]) continue;
4858     /* form outgoing message for i-structure:
4859          buf_si[0]:                 nrows to be sent
4860                [1:nrows]:           row index (global)
4861                [nrows+1:2*nrows+1]: i-structure index
4862     */
4863     /*-------------------------------------------*/
4864     nrows       = len_si[proc]/2 - 1;
4865     buf_si_i    = buf_si + nrows+1;
4866     buf_si[0]   = nrows;
4867     buf_si_i[0] = 0;
4868     nrows       = 0;
4869     for (i=owners[proc]; i<owners[proc+1]; i++) {
4870       anzi = ai[i+1] - ai[i];
4871       if (anzi) {
4872         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4873         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4874         nrows++;
4875       }
4876     }
4877     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4878     k++;
4879     buf_si += len_si[proc];
4880   }
4881 
4882   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4883   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4884 
4885   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4886   for (i=0; i<merge->nrecv; i++) {
4887     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4888   }
4889 
4890   ierr = PetscFree(len_si);CHKERRQ(ierr);
4891   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4892   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4893   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4894   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4895   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4896   ierr = PetscFree(status);CHKERRQ(ierr);
4897 
4898   /* compute a local seq matrix in each processor */
4899   /*----------------------------------------------*/
4900   /* allocate bi array and free space for accumulating nonzero column info */
4901   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4902   bi[0] = 0;
4903 
4904   /* create and initialize a linked list */
4905   nlnk = N+1;
4906   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4907 
4908   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4909   len  = ai[owners[rank+1]] - ai[owners[rank]];
4910   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4911 
4912   current_space = free_space;
4913 
4914   /* determine symbolic info for each local row */
4915   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4916 
4917   for (k=0; k<merge->nrecv; k++) {
4918     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4919     nrows       = *buf_ri_k[k];
4920     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4921     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4922   }
4923 
4924   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4925   len  = 0;
4926   for (i=0; i<m; i++) {
4927     bnzi = 0;
4928     /* add local non-zero cols of this proc's seqmat into lnk */
4929     arow  = owners[rank] + i;
4930     anzi  = ai[arow+1] - ai[arow];
4931     aj    = a->j + ai[arow];
4932     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4933     bnzi += nlnk;
4934     /* add received col data into lnk */
4935     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4936       if (i == *nextrow[k]) { /* i-th row */
4937         anzi  = *(nextai[k]+1) - *nextai[k];
4938         aj    = buf_rj[k] + *nextai[k];
4939         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4940         bnzi += nlnk;
4941         nextrow[k]++; nextai[k]++;
4942       }
4943     }
4944     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4945 
4946     /* if free space is not available, make more free space */
4947     if (current_space->local_remaining<bnzi) {
4948       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4949       nspacedouble++;
4950     }
4951     /* copy data into free space, then initialize lnk */
4952     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4953     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4954 
4955     current_space->array           += bnzi;
4956     current_space->local_used      += bnzi;
4957     current_space->local_remaining -= bnzi;
4958 
4959     bi[i+1] = bi[i] + bnzi;
4960   }
4961 
4962   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4963 
4964   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4965   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4966   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4967 
4968   /* create symbolic parallel matrix B_mpi */
4969   /*---------------------------------------*/
4970   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4971   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4972   if (n==PETSC_DECIDE) {
4973     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4974   } else {
4975     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4976   }
4977   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4978   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4979   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4980   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4981   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4982 
4983   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4984   B_mpi->assembled    = PETSC_FALSE;
4985   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4986   merge->bi           = bi;
4987   merge->bj           = bj;
4988   merge->buf_ri       = buf_ri;
4989   merge->buf_rj       = buf_rj;
4990   merge->coi          = NULL;
4991   merge->coj          = NULL;
4992   merge->owners_co    = NULL;
4993 
4994   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4995 
4996   /* attach the supporting struct to B_mpi for reuse */
4997   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4998   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4999   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5000   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5001   *mpimat = B_mpi;
5002 
5003   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 /*@C
5008       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5009                  matrices from each processor
5010 
5011     Collective
5012 
5013    Input Parameters:
5014 +    comm - the communicators the parallel matrix will live on
5015 .    seqmat - the input sequential matrices
5016 .    m - number of local rows (or PETSC_DECIDE)
5017 .    n - number of local columns (or PETSC_DECIDE)
5018 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5019 
5020    Output Parameter:
5021 .    mpimat - the parallel matrix generated
5022 
5023     Level: advanced
5024 
5025    Notes:
5026      The dimensions of the sequential matrix in each processor MUST be the same.
5027      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5028      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5029 @*/
5030 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5031 {
5032   PetscErrorCode ierr;
5033   PetscMPIInt    size;
5034 
5035   PetscFunctionBegin;
5036   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5037   if (size == 1) {
5038     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5039     if (scall == MAT_INITIAL_MATRIX) {
5040       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5041     } else {
5042       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5043     }
5044     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5045     PetscFunctionReturn(0);
5046   }
5047   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5048   if (scall == MAT_INITIAL_MATRIX) {
5049     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5050   }
5051   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5052   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5053   PetscFunctionReturn(0);
5054 }
5055 
5056 /*@
5057      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5058           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5059           with MatGetSize()
5060 
5061     Not Collective
5062 
5063    Input Parameters:
5064 +    A - the matrix
5065 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5066 
5067    Output Parameter:
5068 .    A_loc - the local sequential matrix generated
5069 
5070     Level: developer
5071 
5072 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5073 
5074 @*/
5075 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5076 {
5077   PetscErrorCode ierr;
5078   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5079   Mat_SeqAIJ     *mat,*a,*b;
5080   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5081   MatScalar      *aa,*ba,*cam;
5082   PetscScalar    *ca;
5083   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5084   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5085   PetscBool      match;
5086   MPI_Comm       comm;
5087   PetscMPIInt    size;
5088 
5089   PetscFunctionBegin;
5090   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5091   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5092   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5093   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5094   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5095 
5096   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5097   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5098   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5099   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5100   aa = a->a; ba = b->a;
5101   if (scall == MAT_INITIAL_MATRIX) {
5102     if (size == 1) {
5103       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5104       PetscFunctionReturn(0);
5105     }
5106 
5107     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5108     ci[0] = 0;
5109     for (i=0; i<am; i++) {
5110       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5111     }
5112     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5113     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5114     k    = 0;
5115     for (i=0; i<am; i++) {
5116       ncols_o = bi[i+1] - bi[i];
5117       ncols_d = ai[i+1] - ai[i];
5118       /* off-diagonal portion of A */
5119       for (jo=0; jo<ncols_o; jo++) {
5120         col = cmap[*bj];
5121         if (col >= cstart) break;
5122         cj[k]   = col; bj++;
5123         ca[k++] = *ba++;
5124       }
5125       /* diagonal portion of A */
5126       for (j=0; j<ncols_d; j++) {
5127         cj[k]   = cstart + *aj++;
5128         ca[k++] = *aa++;
5129       }
5130       /* off-diagonal portion of A */
5131       for (j=jo; j<ncols_o; j++) {
5132         cj[k]   = cmap[*bj++];
5133         ca[k++] = *ba++;
5134       }
5135     }
5136     /* put together the new matrix */
5137     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5138     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5139     /* Since these are PETSc arrays, change flags to free them as necessary. */
5140     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5141     mat->free_a  = PETSC_TRUE;
5142     mat->free_ij = PETSC_TRUE;
5143     mat->nonew   = 0;
5144   } else if (scall == MAT_REUSE_MATRIX) {
5145     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5146     ci = mat->i; cj = mat->j; cam = mat->a;
5147     for (i=0; i<am; i++) {
5148       /* off-diagonal portion of A */
5149       ncols_o = bi[i+1] - bi[i];
5150       for (jo=0; jo<ncols_o; jo++) {
5151         col = cmap[*bj];
5152         if (col >= cstart) break;
5153         *cam++ = *ba++; bj++;
5154       }
5155       /* diagonal portion of A */
5156       ncols_d = ai[i+1] - ai[i];
5157       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5158       /* off-diagonal portion of A */
5159       for (j=jo; j<ncols_o; j++) {
5160         *cam++ = *ba++; bj++;
5161       }
5162     }
5163   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5164   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5165   PetscFunctionReturn(0);
5166 }
5167 
5168 /*@C
5169      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5170 
5171     Not Collective
5172 
5173    Input Parameters:
5174 +    A - the matrix
5175 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5176 -    row, col - index sets of rows and columns to extract (or NULL)
5177 
5178    Output Parameter:
5179 .    A_loc - the local sequential matrix generated
5180 
5181     Level: developer
5182 
5183 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5184 
5185 @*/
5186 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5187 {
5188   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5189   PetscErrorCode ierr;
5190   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5191   IS             isrowa,iscola;
5192   Mat            *aloc;
5193   PetscBool      match;
5194 
5195   PetscFunctionBegin;
5196   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5197   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5198   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5199   if (!row) {
5200     start = A->rmap->rstart; end = A->rmap->rend;
5201     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5202   } else {
5203     isrowa = *row;
5204   }
5205   if (!col) {
5206     start = A->cmap->rstart;
5207     cmap  = a->garray;
5208     nzA   = a->A->cmap->n;
5209     nzB   = a->B->cmap->n;
5210     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5211     ncols = 0;
5212     for (i=0; i<nzB; i++) {
5213       if (cmap[i] < start) idx[ncols++] = cmap[i];
5214       else break;
5215     }
5216     imark = i;
5217     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5218     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5219     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5220   } else {
5221     iscola = *col;
5222   }
5223   if (scall != MAT_INITIAL_MATRIX) {
5224     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5225     aloc[0] = *A_loc;
5226   }
5227   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5228   if (!col) { /* attach global id of condensed columns */
5229     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5230   }
5231   *A_loc = aloc[0];
5232   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5233   if (!row) {
5234     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5235   }
5236   if (!col) {
5237     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5238   }
5239   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5240   PetscFunctionReturn(0);
5241 }
5242 
5243 /*@C
5244     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5245 
5246     Collective on Mat
5247 
5248    Input Parameters:
5249 +    A,B - the matrices in mpiaij format
5250 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5251 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5252 
5253    Output Parameter:
5254 +    rowb, colb - index sets of rows and columns of B to extract
5255 -    B_seq - the sequential matrix generated
5256 
5257     Level: developer
5258 
5259 @*/
5260 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5261 {
5262   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5263   PetscErrorCode ierr;
5264   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5265   IS             isrowb,iscolb;
5266   Mat            *bseq=NULL;
5267 
5268   PetscFunctionBegin;
5269   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5270     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5271   }
5272   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5273 
5274   if (scall == MAT_INITIAL_MATRIX) {
5275     start = A->cmap->rstart;
5276     cmap  = a->garray;
5277     nzA   = a->A->cmap->n;
5278     nzB   = a->B->cmap->n;
5279     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5280     ncols = 0;
5281     for (i=0; i<nzB; i++) {  /* row < local row index */
5282       if (cmap[i] < start) idx[ncols++] = cmap[i];
5283       else break;
5284     }
5285     imark = i;
5286     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5287     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5288     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5289     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5290   } else {
5291     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5292     isrowb  = *rowb; iscolb = *colb;
5293     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5294     bseq[0] = *B_seq;
5295   }
5296   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5297   *B_seq = bseq[0];
5298   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5299   if (!rowb) {
5300     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5301   } else {
5302     *rowb = isrowb;
5303   }
5304   if (!colb) {
5305     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5306   } else {
5307     *colb = iscolb;
5308   }
5309   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5310   PetscFunctionReturn(0);
5311 }
5312 
5313 /*
5314     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5315     of the OFF-DIAGONAL portion of local A
5316 
5317     Collective on Mat
5318 
5319    Input Parameters:
5320 +    A,B - the matrices in mpiaij format
5321 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5322 
5323    Output Parameter:
5324 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5325 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5326 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5327 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5328 
5329     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5330      for this matrix. This is not desirable..
5331 
5332     Level: developer
5333 
5334 */
5335 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5336 {
5337   PetscErrorCode         ierr;
5338   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5339   Mat_SeqAIJ             *b_oth;
5340   VecScatter             ctx;
5341   MPI_Comm               comm;
5342   const PetscMPIInt      *rprocs,*sprocs;
5343   const PetscInt         *srow,*rstarts,*sstarts;
5344   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5345   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5346   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5347   MPI_Request            *rwaits = NULL,*swaits = NULL;
5348   MPI_Status             rstatus;
5349   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5350 
5351   PetscFunctionBegin;
5352   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5353   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5354 
5355   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5356     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5357   }
5358   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5359   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5360 
5361   if (size == 1) {
5362     startsj_s = NULL;
5363     bufa_ptr  = NULL;
5364     *B_oth    = NULL;
5365     PetscFunctionReturn(0);
5366   }
5367 
5368   ctx = a->Mvctx;
5369   tag = ((PetscObject)ctx)->tag;
5370 
5371   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5372   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5373   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5374   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5375   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5376   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5377   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5378 
5379   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5380   if (scall == MAT_INITIAL_MATRIX) {
5381     /* i-array */
5382     /*---------*/
5383     /*  post receives */
5384     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5385     for (i=0; i<nrecvs; i++) {
5386       rowlen = rvalues + rstarts[i]*rbs;
5387       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5388       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5389     }
5390 
5391     /* pack the outgoing message */
5392     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5393 
5394     sstartsj[0] = 0;
5395     rstartsj[0] = 0;
5396     len         = 0; /* total length of j or a array to be sent */
5397     if (nsends) {
5398       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5399       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5400     }
5401     for (i=0; i<nsends; i++) {
5402       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5403       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5404       for (j=0; j<nrows; j++) {
5405         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5406         for (l=0; l<sbs; l++) {
5407           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5408 
5409           rowlen[j*sbs+l] = ncols;
5410 
5411           len += ncols;
5412           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5413         }
5414         k++;
5415       }
5416       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5417 
5418       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5419     }
5420     /* recvs and sends of i-array are completed */
5421     i = nrecvs;
5422     while (i--) {
5423       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5424     }
5425     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5426     ierr = PetscFree(svalues);CHKERRQ(ierr);
5427 
5428     /* allocate buffers for sending j and a arrays */
5429     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5430     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5431 
5432     /* create i-array of B_oth */
5433     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5434 
5435     b_othi[0] = 0;
5436     len       = 0; /* total length of j or a array to be received */
5437     k         = 0;
5438     for (i=0; i<nrecvs; i++) {
5439       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5440       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5441       for (j=0; j<nrows; j++) {
5442         b_othi[k+1] = b_othi[k] + rowlen[j];
5443         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5444         k++;
5445       }
5446       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5447     }
5448     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5449 
5450     /* allocate space for j and a arrrays of B_oth */
5451     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5452     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5453 
5454     /* j-array */
5455     /*---------*/
5456     /*  post receives of j-array */
5457     for (i=0; i<nrecvs; i++) {
5458       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5459       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5460     }
5461 
5462     /* pack the outgoing message j-array */
5463     if (nsends) k = sstarts[0];
5464     for (i=0; i<nsends; i++) {
5465       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5466       bufJ  = bufj+sstartsj[i];
5467       for (j=0; j<nrows; j++) {
5468         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5469         for (ll=0; ll<sbs; ll++) {
5470           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5471           for (l=0; l<ncols; l++) {
5472             *bufJ++ = cols[l];
5473           }
5474           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5475         }
5476       }
5477       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5478     }
5479 
5480     /* recvs and sends of j-array are completed */
5481     i = nrecvs;
5482     while (i--) {
5483       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5484     }
5485     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5486   } else if (scall == MAT_REUSE_MATRIX) {
5487     sstartsj = *startsj_s;
5488     rstartsj = *startsj_r;
5489     bufa     = *bufa_ptr;
5490     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5491     b_otha   = b_oth->a;
5492   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5493 
5494   /* a-array */
5495   /*---------*/
5496   /*  post receives of a-array */
5497   for (i=0; i<nrecvs; i++) {
5498     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5499     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5500   }
5501 
5502   /* pack the outgoing message a-array */
5503   if (nsends) k = sstarts[0];
5504   for (i=0; i<nsends; i++) {
5505     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5506     bufA  = bufa+sstartsj[i];
5507     for (j=0; j<nrows; j++) {
5508       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5509       for (ll=0; ll<sbs; ll++) {
5510         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5511         for (l=0; l<ncols; l++) {
5512           *bufA++ = vals[l];
5513         }
5514         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5515       }
5516     }
5517     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5518   }
5519   /* recvs and sends of a-array are completed */
5520   i = nrecvs;
5521   while (i--) {
5522     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5523   }
5524   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5525   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5526 
5527   if (scall == MAT_INITIAL_MATRIX) {
5528     /* put together the new matrix */
5529     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5530 
5531     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5532     /* Since these are PETSc arrays, change flags to free them as necessary. */
5533     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5534     b_oth->free_a  = PETSC_TRUE;
5535     b_oth->free_ij = PETSC_TRUE;
5536     b_oth->nonew   = 0;
5537 
5538     ierr = PetscFree(bufj);CHKERRQ(ierr);
5539     if (!startsj_s || !bufa_ptr) {
5540       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5541       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5542     } else {
5543       *startsj_s = sstartsj;
5544       *startsj_r = rstartsj;
5545       *bufa_ptr  = bufa;
5546     }
5547   }
5548 
5549   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5550   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5551   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 
5555 /*@C
5556   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5557 
5558   Not Collective
5559 
5560   Input Parameters:
5561 . A - The matrix in mpiaij format
5562 
5563   Output Parameter:
5564 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5565 . colmap - A map from global column index to local index into lvec
5566 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5567 
5568   Level: developer
5569 
5570 @*/
5571 #if defined(PETSC_USE_CTABLE)
5572 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5573 #else
5574 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5575 #endif
5576 {
5577   Mat_MPIAIJ *a;
5578 
5579   PetscFunctionBegin;
5580   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5581   PetscValidPointer(lvec, 2);
5582   PetscValidPointer(colmap, 3);
5583   PetscValidPointer(multScatter, 4);
5584   a = (Mat_MPIAIJ*) A->data;
5585   if (lvec) *lvec = a->lvec;
5586   if (colmap) *colmap = a->colmap;
5587   if (multScatter) *multScatter = a->Mvctx;
5588   PetscFunctionReturn(0);
5589 }
5590 
5591 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5592 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5593 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5594 #if defined(PETSC_HAVE_MKL_SPARSE)
5595 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5596 #endif
5597 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5598 #if defined(PETSC_HAVE_ELEMENTAL)
5599 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5600 #endif
5601 #if defined(PETSC_HAVE_HYPRE)
5602 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5603 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5604 #endif
5605 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5606 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5607 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5608 
5609 /*
5610     Computes (B'*A')' since computing B*A directly is untenable
5611 
5612                n                       p                          p
5613         (              )       (              )         (                  )
5614       m (      A       )  *  n (       B      )   =   m (         C        )
5615         (              )       (              )         (                  )
5616 
5617 */
5618 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5619 {
5620   PetscErrorCode ierr;
5621   Mat            At,Bt,Ct;
5622 
5623   PetscFunctionBegin;
5624   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5625   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5626   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5627   ierr = MatDestroy(&At);CHKERRQ(ierr);
5628   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5629   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5630   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5631   PetscFunctionReturn(0);
5632 }
5633 
5634 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5635 {
5636   PetscErrorCode ierr;
5637   PetscInt       m=A->rmap->n,n=B->cmap->n;
5638   Mat            Cmat;
5639 
5640   PetscFunctionBegin;
5641   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5642   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5643   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5644   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5645   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5646   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5647   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5648   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5649 
5650   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5651 
5652   *C = Cmat;
5653   PetscFunctionReturn(0);
5654 }
5655 
5656 /* ----------------------------------------------------------------*/
5657 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5658 {
5659   PetscErrorCode ierr;
5660 
5661   PetscFunctionBegin;
5662   if (scall == MAT_INITIAL_MATRIX) {
5663     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5664     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5665     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5666   }
5667   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5668   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5669   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5670   PetscFunctionReturn(0);
5671 }
5672 
5673 /*MC
5674    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5675 
5676    Options Database Keys:
5677 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5678 
5679   Level: beginner
5680 
5681 .seealso: MatCreateAIJ()
5682 M*/
5683 
5684 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5685 {
5686   Mat_MPIAIJ     *b;
5687   PetscErrorCode ierr;
5688   PetscMPIInt    size;
5689 
5690   PetscFunctionBegin;
5691   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5692 
5693   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5694   B->data       = (void*)b;
5695   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5696   B->assembled  = PETSC_FALSE;
5697   B->insertmode = NOT_SET_VALUES;
5698   b->size       = size;
5699 
5700   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5701 
5702   /* build cache for off array entries formed */
5703   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5704 
5705   b->donotstash  = PETSC_FALSE;
5706   b->colmap      = 0;
5707   b->garray      = 0;
5708   b->roworiented = PETSC_TRUE;
5709 
5710   /* stuff used for matrix vector multiply */
5711   b->lvec  = NULL;
5712   b->Mvctx = NULL;
5713 
5714   /* stuff for MatGetRow() */
5715   b->rowindices   = 0;
5716   b->rowvalues    = 0;
5717   b->getrowactive = PETSC_FALSE;
5718 
5719   /* flexible pointer used in CUSP/CUSPARSE classes */
5720   b->spptr = NULL;
5721 
5722   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5723   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5724   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5725   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5726   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5727   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5728   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5729   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5730   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5732 #if defined(PETSC_HAVE_MKL_SPARSE)
5733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5734 #endif
5735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5737 #if defined(PETSC_HAVE_ELEMENTAL)
5738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5739 #endif
5740 #if defined(PETSC_HAVE_HYPRE)
5741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5742 #endif
5743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5744   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5745   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5746   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5747   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5748 #if defined(PETSC_HAVE_HYPRE)
5749   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5750 #endif
5751   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5752   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5753   PetscFunctionReturn(0);
5754 }
5755 
5756 /*@C
5757      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5758          and "off-diagonal" part of the matrix in CSR format.
5759 
5760    Collective
5761 
5762    Input Parameters:
5763 +  comm - MPI communicator
5764 .  m - number of local rows (Cannot be PETSC_DECIDE)
5765 .  n - This value should be the same as the local size used in creating the
5766        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5767        calculated if N is given) For square matrices n is almost always m.
5768 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5769 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5770 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5771 .   j - column indices
5772 .   a - matrix values
5773 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5774 .   oj - column indices
5775 -   oa - matrix values
5776 
5777    Output Parameter:
5778 .   mat - the matrix
5779 
5780    Level: advanced
5781 
5782    Notes:
5783        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5784        must free the arrays once the matrix has been destroyed and not before.
5785 
5786        The i and j indices are 0 based
5787 
5788        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5789 
5790        This sets local rows and cannot be used to set off-processor values.
5791 
5792        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5793        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5794        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5795        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5796        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5797        communication if it is known that only local entries will be set.
5798 
5799 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5800           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5801 @*/
5802 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5803 {
5804   PetscErrorCode ierr;
5805   Mat_MPIAIJ     *maij;
5806 
5807   PetscFunctionBegin;
5808   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5809   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5810   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5811   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5812   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5813   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5814   maij = (Mat_MPIAIJ*) (*mat)->data;
5815 
5816   (*mat)->preallocated = PETSC_TRUE;
5817 
5818   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5819   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5820 
5821   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5822   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5823 
5824   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5825   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5826   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5827   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5828 
5829   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5830   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5831   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5832   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5833   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5834   PetscFunctionReturn(0);
5835 }
5836 
5837 /*
5838     Special version for direct calls from Fortran
5839 */
5840 #include <petsc/private/fortranimpl.h>
5841 
5842 /* Change these macros so can be used in void function */
5843 #undef CHKERRQ
5844 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5845 #undef SETERRQ2
5846 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5847 #undef SETERRQ3
5848 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5849 #undef SETERRQ
5850 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5851 
5852 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5853 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5854 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5855 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5856 #else
5857 #endif
5858 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5859 {
5860   Mat            mat  = *mmat;
5861   PetscInt       m    = *mm, n = *mn;
5862   InsertMode     addv = *maddv;
5863   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5864   PetscScalar    value;
5865   PetscErrorCode ierr;
5866 
5867   MatCheckPreallocated(mat,1);
5868   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5869 
5870 #if defined(PETSC_USE_DEBUG)
5871   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5872 #endif
5873   {
5874     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5875     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5876     PetscBool roworiented = aij->roworiented;
5877 
5878     /* Some Variables required in the macro */
5879     Mat        A                 = aij->A;
5880     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5881     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5882     MatScalar  *aa               = a->a;
5883     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5884     Mat        B                 = aij->B;
5885     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5886     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5887     MatScalar  *ba               = b->a;
5888 
5889     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5890     PetscInt  nonew = a->nonew;
5891     MatScalar *ap1,*ap2;
5892 
5893     PetscFunctionBegin;
5894     for (i=0; i<m; i++) {
5895       if (im[i] < 0) continue;
5896 #if defined(PETSC_USE_DEBUG)
5897       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5898 #endif
5899       if (im[i] >= rstart && im[i] < rend) {
5900         row      = im[i] - rstart;
5901         lastcol1 = -1;
5902         rp1      = aj + ai[row];
5903         ap1      = aa + ai[row];
5904         rmax1    = aimax[row];
5905         nrow1    = ailen[row];
5906         low1     = 0;
5907         high1    = nrow1;
5908         lastcol2 = -1;
5909         rp2      = bj + bi[row];
5910         ap2      = ba + bi[row];
5911         rmax2    = bimax[row];
5912         nrow2    = bilen[row];
5913         low2     = 0;
5914         high2    = nrow2;
5915 
5916         for (j=0; j<n; j++) {
5917           if (roworiented) value = v[i*n+j];
5918           else value = v[i+j*m];
5919           if (in[j] >= cstart && in[j] < cend) {
5920             col = in[j] - cstart;
5921             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5922             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5923           } else if (in[j] < 0) continue;
5924 #if defined(PETSC_USE_DEBUG)
5925           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5926           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5927 #endif
5928           else {
5929             if (mat->was_assembled) {
5930               if (!aij->colmap) {
5931                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5932               }
5933 #if defined(PETSC_USE_CTABLE)
5934               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5935               col--;
5936 #else
5937               col = aij->colmap[in[j]] - 1;
5938 #endif
5939               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5940               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5941                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5942                 col  =  in[j];
5943                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5944                 B     = aij->B;
5945                 b     = (Mat_SeqAIJ*)B->data;
5946                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5947                 rp2   = bj + bi[row];
5948                 ap2   = ba + bi[row];
5949                 rmax2 = bimax[row];
5950                 nrow2 = bilen[row];
5951                 low2  = 0;
5952                 high2 = nrow2;
5953                 bm    = aij->B->rmap->n;
5954                 ba    = b->a;
5955               }
5956             } else col = in[j];
5957             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5958           }
5959         }
5960       } else if (!aij->donotstash) {
5961         if (roworiented) {
5962           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5963         } else {
5964           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5965         }
5966       }
5967     }
5968   }
5969   PetscFunctionReturnVoid();
5970 }
5971