xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d7cc930e14e615e9907267aaa472dd0ccceeab82)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = NULL;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = NULL;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818   case MAT_SORTED_FULL:
1819     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1820     break;
1821   case MAT_IGNORE_OFF_PROC_ENTRIES:
1822     a->donotstash = flg;
1823     break;
1824   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1825   case MAT_SPD:
1826   case MAT_SYMMETRIC:
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828   case MAT_HERMITIAN:
1829   case MAT_SYMMETRY_ETERNAL:
1830     break;
1831   case MAT_SUBMAT_SINGLEIS:
1832     A->submat_singleis = flg;
1833     break;
1834   case MAT_STRUCTURE_ONLY:
1835     /* The option is handled directly by MatSetOption() */
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1844 {
1845   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1846   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1847   PetscErrorCode ierr;
1848   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1849   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1850   PetscInt       *cmap,*idx_p;
1851 
1852   PetscFunctionBegin;
1853   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1854   mat->getrowactive = PETSC_TRUE;
1855 
1856   if (!mat->rowvalues && (idx || v)) {
1857     /*
1858         allocate enough space to hold information from the longest row.
1859     */
1860     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1861     PetscInt   max = 1,tmp;
1862     for (i=0; i<matin->rmap->n; i++) {
1863       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1864       if (max < tmp) max = tmp;
1865     }
1866     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1867   }
1868 
1869   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1870   lrow = row - rstart;
1871 
1872   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1873   if (!v)   {pvA = NULL; pvB = NULL;}
1874   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1875   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1876   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1877   nztot = nzA + nzB;
1878 
1879   cmap = mat->garray;
1880   if (v  || idx) {
1881     if (nztot) {
1882       /* Sort by increasing column numbers, assuming A and B already sorted */
1883       PetscInt imark = -1;
1884       if (v) {
1885         *v = v_p = mat->rowvalues;
1886         for (i=0; i<nzB; i++) {
1887           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1888           else break;
1889         }
1890         imark = i;
1891         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1892         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1893       }
1894       if (idx) {
1895         *idx = idx_p = mat->rowindices;
1896         if (imark > -1) {
1897           for (i=0; i<imark; i++) {
1898             idx_p[i] = cmap[cworkB[i]];
1899           }
1900         } else {
1901           for (i=0; i<nzB; i++) {
1902             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1903             else break;
1904           }
1905           imark = i;
1906         }
1907         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1908         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1909       }
1910     } else {
1911       if (idx) *idx = NULL;
1912       if (v)   *v   = NULL;
1913     }
1914   }
1915   *nz  = nztot;
1916   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1917   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1922 {
1923   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1924 
1925   PetscFunctionBegin;
1926   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1927   aij->getrowactive = PETSC_FALSE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1932 {
1933   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1934   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       i,j,cstart = mat->cmap->rstart;
1937   PetscReal      sum = 0.0;
1938   MatScalar      *v;
1939 
1940   PetscFunctionBegin;
1941   if (aij->size == 1) {
1942     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1943   } else {
1944     if (type == NORM_FROBENIUS) {
1945       v = amat->a;
1946       for (i=0; i<amat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1960       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v     = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v   = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1999 {
2000   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2001   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2002   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2003   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2004   PetscErrorCode  ierr;
2005   Mat             B,A_diag,*B_diag;
2006   const MatScalar *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2237        will be updated */
2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2239     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2240       Y->offloadmask = PETSC_OFFLOAD_CPU;
2241     }
2242 #endif
2243   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2244     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2245   } else {
2246     Mat      B;
2247     PetscInt *nnz_d,*nnz_o;
2248     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2249     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2250     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2251     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2252     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2253     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2256     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2257     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2258     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2266 
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2283 {
2284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2289   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308   PetscInt       i,*idxb = NULL;
2309   PetscScalar    *va,*vb;
2310   Vec            vtmp;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2314   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2315   if (idx) {
2316     for (i=0; i<A->rmap->n; i++) {
2317       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2318     }
2319   }
2320 
2321   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2322   if (idx) {
2323     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2324   }
2325   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2326   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2327 
2328   for (i=0; i<A->rmap->n; i++) {
2329     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2330       va[i] = vb[i];
2331       if (idx) idx[i] = a->garray[idxb[i]];
2332     }
2333   }
2334 
2335   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2336   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2337   ierr = PetscFree(idxb);CHKERRQ(ierr);
2338   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2343 {
2344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2345   PetscErrorCode ierr;
2346   PetscInt       i,*idxb = NULL;
2347   PetscScalar    *va,*vb;
2348   Vec            vtmp;
2349 
2350   PetscFunctionBegin;
2351   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2352   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2353   if (idx) {
2354     for (i=0; i<A->cmap->n; i++) {
2355       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2356     }
2357   }
2358 
2359   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2360   if (idx) {
2361     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2362   }
2363   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2364   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2365 
2366   for (i=0; i<A->rmap->n; i++) {
2367     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2368       va[i] = vb[i];
2369       if (idx) idx[i] = a->garray[idxb[i]];
2370     }
2371   }
2372 
2373   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2375   ierr = PetscFree(idxb);CHKERRQ(ierr);
2376   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2381 {
2382   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2383   PetscInt       n      = A->rmap->n;
2384   PetscInt       cstart = A->cmap->rstart;
2385   PetscInt       *cmap  = mat->garray;
2386   PetscInt       *diagIdx, *offdiagIdx;
2387   Vec            diagV, offdiagV;
2388   PetscScalar    *a, *diagA, *offdiagA;
2389   PetscInt       r;
2390   PetscErrorCode ierr;
2391 
2392   PetscFunctionBegin;
2393   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2394   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2395   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2396   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2397   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2398   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2399   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2400   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2401   for (r = 0; r < n; ++r) {
2402     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2403       a[r]   = diagA[r];
2404       idx[r] = cstart + diagIdx[r];
2405     } else {
2406       a[r]   = offdiagA[r];
2407       idx[r] = cmap[offdiagIdx[r]];
2408     }
2409   }
2410   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2411   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2412   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2413   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2414   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2415   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2420 {
2421   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2422   PetscInt       n      = A->rmap->n;
2423   PetscInt       cstart = A->cmap->rstart;
2424   PetscInt       *cmap  = mat->garray;
2425   PetscInt       *diagIdx, *offdiagIdx;
2426   Vec            diagV, offdiagV;
2427   PetscScalar    *a, *diagA, *offdiagA;
2428   PetscInt       r;
2429   PetscErrorCode ierr;
2430 
2431   PetscFunctionBegin;
2432   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2433   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2434   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2435   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2436   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2437   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2438   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2439   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2440   for (r = 0; r < n; ++r) {
2441     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2442       a[r]   = diagA[r];
2443       idx[r] = cstart + diagIdx[r];
2444     } else {
2445       a[r]   = offdiagA[r];
2446       idx[r] = cmap[offdiagIdx[r]];
2447     }
2448   }
2449   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2450   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2451   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2452   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2453   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2454   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2459 {
2460   PetscErrorCode ierr;
2461   Mat            *dummy;
2462 
2463   PetscFunctionBegin;
2464   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2465   *newmat = *dummy;
2466   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2471 {
2472   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2473   PetscErrorCode ierr;
2474 
2475   PetscFunctionBegin;
2476   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2477   A->factorerrortype = a->A->factorerrortype;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2482 {
2483   PetscErrorCode ierr;
2484   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2485 
2486   PetscFunctionBegin;
2487   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2488   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2489   if (x->assembled) {
2490     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2491   } else {
2492     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2493   }
2494   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2495   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2500 {
2501   PetscFunctionBegin;
2502   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2503   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 /*@
2508    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2509 
2510    Collective on Mat
2511 
2512    Input Parameters:
2513 +    A - the matrix
2514 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2515 
2516  Level: advanced
2517 
2518 @*/
2519 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2520 {
2521   PetscErrorCode       ierr;
2522 
2523   PetscFunctionBegin;
2524   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2529 {
2530   PetscErrorCode       ierr;
2531   PetscBool            sc = PETSC_FALSE,flg;
2532 
2533   PetscFunctionBegin;
2534   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2535   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2536   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2537   if (flg) {
2538     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2539   }
2540   ierr = PetscOptionsTail();CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2545 {
2546   PetscErrorCode ierr;
2547   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2548   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2549 
2550   PetscFunctionBegin;
2551   if (!Y->preallocated) {
2552     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2553   } else if (!aij->nz) {
2554     PetscInt nonew = aij->nonew;
2555     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2556     aij->nonew = nonew;
2557   }
2558   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2563 {
2564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2569   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2570   if (d) {
2571     PetscInt rstart;
2572     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2573     *d += rstart;
2574 
2575   }
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2580 {
2581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2582   PetscErrorCode ierr;
2583 
2584   PetscFunctionBegin;
2585   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 /* -------------------------------------------------------------------*/
2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2591                                        MatGetRow_MPIAIJ,
2592                                        MatRestoreRow_MPIAIJ,
2593                                        MatMult_MPIAIJ,
2594                                 /* 4*/ MatMultAdd_MPIAIJ,
2595                                        MatMultTranspose_MPIAIJ,
2596                                        MatMultTransposeAdd_MPIAIJ,
2597                                        NULL,
2598                                        NULL,
2599                                        NULL,
2600                                 /*10*/ NULL,
2601                                        NULL,
2602                                        NULL,
2603                                        MatSOR_MPIAIJ,
2604                                        MatTranspose_MPIAIJ,
2605                                 /*15*/ MatGetInfo_MPIAIJ,
2606                                        MatEqual_MPIAIJ,
2607                                        MatGetDiagonal_MPIAIJ,
2608                                        MatDiagonalScale_MPIAIJ,
2609                                        MatNorm_MPIAIJ,
2610                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2611                                        MatAssemblyEnd_MPIAIJ,
2612                                        MatSetOption_MPIAIJ,
2613                                        MatZeroEntries_MPIAIJ,
2614                                 /*24*/ MatZeroRows_MPIAIJ,
2615                                        NULL,
2616                                        NULL,
2617                                        NULL,
2618                                        NULL,
2619                                 /*29*/ MatSetUp_MPIAIJ,
2620                                        NULL,
2621                                        NULL,
2622                                        MatGetDiagonalBlock_MPIAIJ,
2623                                        NULL,
2624                                 /*34*/ MatDuplicate_MPIAIJ,
2625                                        NULL,
2626                                        NULL,
2627                                        NULL,
2628                                        NULL,
2629                                 /*39*/ MatAXPY_MPIAIJ,
2630                                        MatCreateSubMatrices_MPIAIJ,
2631                                        MatIncreaseOverlap_MPIAIJ,
2632                                        MatGetValues_MPIAIJ,
2633                                        MatCopy_MPIAIJ,
2634                                 /*44*/ MatGetRowMax_MPIAIJ,
2635                                        MatScale_MPIAIJ,
2636                                        MatShift_MPIAIJ,
2637                                        MatDiagonalSet_MPIAIJ,
2638                                        MatZeroRowsColumns_MPIAIJ,
2639                                 /*49*/ MatSetRandom_MPIAIJ,
2640                                        NULL,
2641                                        NULL,
2642                                        NULL,
2643                                        NULL,
2644                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2645                                        NULL,
2646                                        MatSetUnfactored_MPIAIJ,
2647                                        MatPermute_MPIAIJ,
2648                                        NULL,
2649                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2650                                        MatDestroy_MPIAIJ,
2651                                        MatView_MPIAIJ,
2652                                        NULL,
2653                                        NULL,
2654                                 /*64*/ NULL,
2655                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2656                                        NULL,
2657                                        NULL,
2658                                        NULL,
2659                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2660                                        MatGetRowMinAbs_MPIAIJ,
2661                                        NULL,
2662                                        NULL,
2663                                        NULL,
2664                                        NULL,
2665                                 /*75*/ MatFDColoringApply_AIJ,
2666                                        MatSetFromOptions_MPIAIJ,
2667                                        NULL,
2668                                        NULL,
2669                                        MatFindZeroDiagonals_MPIAIJ,
2670                                 /*80*/ NULL,
2671                                        NULL,
2672                                        NULL,
2673                                 /*83*/ MatLoad_MPIAIJ,
2674                                        MatIsSymmetric_MPIAIJ,
2675                                        NULL,
2676                                        NULL,
2677                                        NULL,
2678                                        NULL,
2679                                 /*89*/ NULL,
2680                                        NULL,
2681                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2682                                        NULL,
2683                                        NULL,
2684                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2685                                        NULL,
2686                                        NULL,
2687                                        NULL,
2688                                        MatBindToCPU_MPIAIJ,
2689                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2690                                        NULL,
2691                                        NULL,
2692                                        MatConjugate_MPIAIJ,
2693                                        NULL,
2694                                 /*104*/MatSetValuesRow_MPIAIJ,
2695                                        MatRealPart_MPIAIJ,
2696                                        MatImaginaryPart_MPIAIJ,
2697                                        NULL,
2698                                        NULL,
2699                                 /*109*/NULL,
2700                                        NULL,
2701                                        MatGetRowMin_MPIAIJ,
2702                                        NULL,
2703                                        MatMissingDiagonal_MPIAIJ,
2704                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2705                                        NULL,
2706                                        MatGetGhosts_MPIAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                 /*119*/NULL,
2710                                        NULL,
2711                                        NULL,
2712                                        NULL,
2713                                        MatGetMultiProcBlock_MPIAIJ,
2714                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2715                                        MatGetColumnNorms_MPIAIJ,
2716                                        MatInvertBlockDiagonal_MPIAIJ,
2717                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2718                                        MatCreateSubMatricesMPI_MPIAIJ,
2719                                 /*129*/NULL,
2720                                        NULL,
2721                                        NULL,
2722                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2723                                        NULL,
2724                                 /*134*/NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        NULL,
2729                                 /*139*/MatSetBlockSizes_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        MatFDColoringSetUp_MPIXAIJ,
2733                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2734                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2735                                 /*145*/NULL,
2736                                        NULL,
2737                                        NULL
2738 };
2739 
2740 /* ----------------------------------------------------------------------------------------*/
2741 
2742 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2743 {
2744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2749   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2754 {
2755   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2760   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2765 {
2766   Mat_MPIAIJ     *b;
2767   PetscErrorCode ierr;
2768   PetscMPIInt    size;
2769 
2770   PetscFunctionBegin;
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   b = (Mat_MPIAIJ*)B->data;
2774 
2775 #if defined(PETSC_USE_CTABLE)
2776   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2777 #else
2778   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2779 #endif
2780   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2781   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2782   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2783 
2784   /* Because the B will have been resized we simply destroy it and create a new one each time */
2785   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2786   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2787   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2788   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2789   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2790   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2791   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2792 
2793   if (!B->preallocated) {
2794     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2795     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2796     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2797     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2798     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2799   }
2800 
2801   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2802   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2803   B->preallocated  = PETSC_TRUE;
2804   B->was_assembled = PETSC_FALSE;
2805   B->assembled     = PETSC_FALSE;
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2810 {
2811   Mat_MPIAIJ     *b;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2816   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2817   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2818   b = (Mat_MPIAIJ*)B->data;
2819 
2820 #if defined(PETSC_USE_CTABLE)
2821   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2822 #else
2823   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2824 #endif
2825   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2826   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2827   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2828 
2829   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2830   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2831   B->preallocated  = PETSC_TRUE;
2832   B->was_assembled = PETSC_FALSE;
2833   B->assembled = PETSC_FALSE;
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2838 {
2839   Mat            mat;
2840   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2841   PetscErrorCode ierr;
2842 
2843   PetscFunctionBegin;
2844   *newmat = NULL;
2845   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2846   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2847   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2848   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2849   a       = (Mat_MPIAIJ*)mat->data;
2850 
2851   mat->factortype   = matin->factortype;
2852   mat->assembled    = matin->assembled;
2853   mat->insertmode   = NOT_SET_VALUES;
2854   mat->preallocated = matin->preallocated;
2855 
2856   a->size         = oldmat->size;
2857   a->rank         = oldmat->rank;
2858   a->donotstash   = oldmat->donotstash;
2859   a->roworiented  = oldmat->roworiented;
2860   a->rowindices   = NULL;
2861   a->rowvalues    = NULL;
2862   a->getrowactive = PETSC_FALSE;
2863 
2864   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2865   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2866 
2867   if (oldmat->colmap) {
2868 #if defined(PETSC_USE_CTABLE)
2869     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2870 #else
2871     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2872     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2873     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2874 #endif
2875   } else a->colmap = NULL;
2876   if (oldmat->garray) {
2877     PetscInt len;
2878     len  = oldmat->B->cmap->n;
2879     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2880     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2881     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2882   } else a->garray = NULL;
2883 
2884   /* It may happen MatDuplicate is called with a non-assembled matrix
2885      In fact, MatDuplicate only requires the matrix to be preallocated
2886      This may happen inside a DMCreateMatrix_Shell */
2887   if (oldmat->lvec) {
2888     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2890   }
2891   if (oldmat->Mvctx) {
2892     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2893     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2894   }
2895   if (oldmat->Mvctx_mpi1) {
2896     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2897     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2898   }
2899 
2900   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2901   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2902   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2903   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2904   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2905   *newmat = mat;
2906   PetscFunctionReturn(0);
2907 }
2908 
2909 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2910 {
2911   PetscBool      isbinary, ishdf5;
2912   PetscErrorCode ierr;
2913 
2914   PetscFunctionBegin;
2915   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2916   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2917   /* force binary viewer to load .info file if it has not yet done so */
2918   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2919   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2920   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2921   if (isbinary) {
2922     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2923   } else if (ishdf5) {
2924 #if defined(PETSC_HAVE_HDF5)
2925     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2926 #else
2927     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2928 #endif
2929   } else {
2930     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2931   }
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2936 {
2937   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2938   PetscInt       *rowidxs,*colidxs;
2939   PetscScalar    *matvals;
2940   PetscErrorCode ierr;
2941 
2942   PetscFunctionBegin;
2943   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2944 
2945   /* read in matrix header */
2946   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2947   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2948   M  = header[1]; N = header[2]; nz = header[3];
2949   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2950   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2951   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2952 
2953   /* set block sizes from the viewer's .info file */
2954   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2955   /* set global sizes if not set already */
2956   if (mat->rmap->N < 0) mat->rmap->N = M;
2957   if (mat->cmap->N < 0) mat->cmap->N = N;
2958   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2960 
2961   /* check if the matrix sizes are correct */
2962   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2963   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2964 
2965   /* read in row lengths and build row indices */
2966   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2967   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2968   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2969   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2970   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2971   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2972   /* read in column indices and matrix values */
2973   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2974   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2975   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2976   /* store matrix indices and values */
2977   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2978   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2979   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 /* Not scalable because of ISAllGather() unless getting all columns. */
2984 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2985 {
2986   PetscErrorCode ierr;
2987   IS             iscol_local;
2988   PetscBool      isstride;
2989   PetscMPIInt    lisstride=0,gisstride;
2990 
2991   PetscFunctionBegin;
2992   /* check if we are grabbing all columns*/
2993   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2994 
2995   if (isstride) {
2996     PetscInt  start,len,mstart,mlen;
2997     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2998     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2999     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3000     if (mstart == start && mlen-mstart == len) lisstride = 1;
3001   }
3002 
3003   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3004   if (gisstride) {
3005     PetscInt N;
3006     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3007     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3008     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3009     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3010   } else {
3011     PetscInt cbs;
3012     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3013     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3014     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3015   }
3016 
3017   *isseq = iscol_local;
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 /*
3022  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3023  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3024 
3025  Input Parameters:
3026    mat - matrix
3027    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->rstart <= isrow[i] < mat->rend
3029    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3030            i.e., mat->cstart <= iscol[i] < mat->cend
3031  Output Parameter:
3032    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3033    iscol_o - sequential column index set for retrieving mat->B
3034    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3035  */
3036 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3037 {
3038   PetscErrorCode ierr;
3039   Vec            x,cmap;
3040   const PetscInt *is_idx;
3041   PetscScalar    *xarray,*cmaparray;
3042   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3043   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3044   Mat            B=a->B;
3045   Vec            lvec=a->lvec,lcmap;
3046   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3047   MPI_Comm       comm;
3048   VecScatter     Mvctx=a->Mvctx;
3049 
3050   PetscFunctionBegin;
3051   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3052   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3053 
3054   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3055   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3056   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3057   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3058   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3059 
3060   /* Get start indices */
3061   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3062   isstart -= ncols;
3063   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3064 
3065   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3066   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3067   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3068   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3069   for (i=0; i<ncols; i++) {
3070     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3071     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3072     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3073   }
3074   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3075   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3076   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3077 
3078   /* Get iscol_d */
3079   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3080   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3081   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3082 
3083   /* Get isrow_d */
3084   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3085   rstart = mat->rmap->rstart;
3086   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3087   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3088   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3089   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3090 
3091   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3092   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3093   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3094 
3095   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3096   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3097   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3098 
3099   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3100 
3101   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3102   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3103 
3104   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3105   /* off-process column indices */
3106   count = 0;
3107   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3108   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3109 
3110   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3111   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3112   for (i=0; i<Bn; i++) {
3113     if (PetscRealPart(xarray[i]) > -1.0) {
3114       idx[count]     = i;                   /* local column index in off-diagonal part B */
3115       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3116       count++;
3117     }
3118   }
3119   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3120   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3121 
3122   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3123   /* cannot ensure iscol_o has same blocksize as iscol! */
3124 
3125   ierr = PetscFree(idx);CHKERRQ(ierr);
3126   *garray = cmap1;
3127 
3128   ierr = VecDestroy(&x);CHKERRQ(ierr);
3129   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3130   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3135 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3136 {
3137   PetscErrorCode ierr;
3138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3139   Mat            M = NULL;
3140   MPI_Comm       comm;
3141   IS             iscol_d,isrow_d,iscol_o;
3142   Mat            Asub = NULL,Bsub = NULL;
3143   PetscInt       n;
3144 
3145   PetscFunctionBegin;
3146   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3147 
3148   if (call == MAT_REUSE_MATRIX) {
3149     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3150     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3151     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3152 
3153     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3154     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3155 
3156     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3157     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3158 
3159     /* Update diagonal and off-diagonal portions of submat */
3160     asub = (Mat_MPIAIJ*)(*submat)->data;
3161     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3162     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3163     if (n) {
3164       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3165     }
3166     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3167     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3168 
3169   } else { /* call == MAT_INITIAL_MATRIX) */
3170     const PetscInt *garray;
3171     PetscInt        BsubN;
3172 
3173     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3174     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3175 
3176     /* Create local submatrices Asub and Bsub */
3177     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3178     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3179 
3180     /* Create submatrix M */
3181     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3182 
3183     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3184     asub = (Mat_MPIAIJ*)M->data;
3185 
3186     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3187     n = asub->B->cmap->N;
3188     if (BsubN > n) {
3189       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3190       const PetscInt *idx;
3191       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3192       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3193 
3194       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3195       j = 0;
3196       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3197       for (i=0; i<n; i++) {
3198         if (j >= BsubN) break;
3199         while (subgarray[i] > garray[j]) j++;
3200 
3201         if (subgarray[i] == garray[j]) {
3202           idx_new[i] = idx[j++];
3203         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3204       }
3205       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3206 
3207       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3208       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3209 
3210     } else if (BsubN < n) {
3211       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3212     }
3213 
3214     ierr = PetscFree(garray);CHKERRQ(ierr);
3215     *submat = M;
3216 
3217     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3218     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3219     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3220 
3221     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3222     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3223 
3224     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3225     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3226   }
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3231 {
3232   PetscErrorCode ierr;
3233   IS             iscol_local=NULL,isrow_d;
3234   PetscInt       csize;
3235   PetscInt       n,i,j,start,end;
3236   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3237   MPI_Comm       comm;
3238 
3239   PetscFunctionBegin;
3240   /* If isrow has same processor distribution as mat,
3241      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3242   if (call == MAT_REUSE_MATRIX) {
3243     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3244     if (isrow_d) {
3245       sameRowDist  = PETSC_TRUE;
3246       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3247     } else {
3248       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3249       if (iscol_local) {
3250         sameRowDist  = PETSC_TRUE;
3251         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3252       }
3253     }
3254   } else {
3255     /* Check if isrow has same processor distribution as mat */
3256     sameDist[0] = PETSC_FALSE;
3257     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3258     if (!n) {
3259       sameDist[0] = PETSC_TRUE;
3260     } else {
3261       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3262       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3263       if (i >= start && j < end) {
3264         sameDist[0] = PETSC_TRUE;
3265       }
3266     }
3267 
3268     /* Check if iscol has same processor distribution as mat */
3269     sameDist[1] = PETSC_FALSE;
3270     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3271     if (!n) {
3272       sameDist[1] = PETSC_TRUE;
3273     } else {
3274       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3275       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3276       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3277     }
3278 
3279     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3280     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3281     sameRowDist = tsameDist[0];
3282   }
3283 
3284   if (sameRowDist) {
3285     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3286       /* isrow and iscol have same processor distribution as mat */
3287       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3288       PetscFunctionReturn(0);
3289     } else { /* sameRowDist */
3290       /* isrow has same processor distribution as mat */
3291       if (call == MAT_INITIAL_MATRIX) {
3292         PetscBool sorted;
3293         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3294         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3295         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3296         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3297 
3298         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3299         if (sorted) {
3300           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3301           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3302           PetscFunctionReturn(0);
3303         }
3304       } else { /* call == MAT_REUSE_MATRIX */
3305         IS    iscol_sub;
3306         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3307         if (iscol_sub) {
3308           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3309           PetscFunctionReturn(0);
3310         }
3311       }
3312     }
3313   }
3314 
3315   /* General case: iscol -> iscol_local which has global size of iscol */
3316   if (call == MAT_REUSE_MATRIX) {
3317     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3318     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3319   } else {
3320     if (!iscol_local) {
3321       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3322     }
3323   }
3324 
3325   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3326   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3327 
3328   if (call == MAT_INITIAL_MATRIX) {
3329     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3330     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3331   }
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 /*@C
3336      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3337          and "off-diagonal" part of the matrix in CSR format.
3338 
3339    Collective
3340 
3341    Input Parameters:
3342 +  comm - MPI communicator
3343 .  A - "diagonal" portion of matrix
3344 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3345 -  garray - global index of B columns
3346 
3347    Output Parameter:
3348 .   mat - the matrix, with input A as its local diagonal matrix
3349    Level: advanced
3350 
3351    Notes:
3352        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3353        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3354 
3355 .seealso: MatCreateMPIAIJWithSplitArrays()
3356 @*/
3357 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3358 {
3359   PetscErrorCode ierr;
3360   Mat_MPIAIJ     *maij;
3361   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3362   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3363   PetscScalar    *oa=b->a;
3364   Mat            Bnew;
3365   PetscInt       m,n,N;
3366 
3367   PetscFunctionBegin;
3368   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3369   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3370   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3371   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3372   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3373   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3374 
3375   /* Get global columns of mat */
3376   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3377 
3378   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3379   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3380   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3381   maij = (Mat_MPIAIJ*)(*mat)->data;
3382 
3383   (*mat)->preallocated = PETSC_TRUE;
3384 
3385   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3386   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3387 
3388   /* Set A as diagonal portion of *mat */
3389   maij->A = A;
3390 
3391   nz = oi[m];
3392   for (i=0; i<nz; i++) {
3393     col   = oj[i];
3394     oj[i] = garray[col];
3395   }
3396 
3397    /* Set Bnew as off-diagonal portion of *mat */
3398   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3399   bnew        = (Mat_SeqAIJ*)Bnew->data;
3400   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3401   maij->B     = Bnew;
3402 
3403   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3404 
3405   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3406   b->free_a       = PETSC_FALSE;
3407   b->free_ij      = PETSC_FALSE;
3408   ierr = MatDestroy(&B);CHKERRQ(ierr);
3409 
3410   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3411   bnew->free_a       = PETSC_TRUE;
3412   bnew->free_ij      = PETSC_TRUE;
3413 
3414   /* condense columns of maij->B */
3415   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3416   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3417   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3419   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3424 
3425 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3426 {
3427   PetscErrorCode ierr;
3428   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3429   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3430   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3431   Mat            M,Msub,B=a->B;
3432   MatScalar      *aa;
3433   Mat_SeqAIJ     *aij;
3434   PetscInt       *garray = a->garray,*colsub,Ncols;
3435   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3436   IS             iscol_sub,iscmap;
3437   const PetscInt *is_idx,*cmap;
3438   PetscBool      allcolumns=PETSC_FALSE;
3439   MPI_Comm       comm;
3440 
3441   PetscFunctionBegin;
3442   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3443 
3444   if (call == MAT_REUSE_MATRIX) {
3445     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3446     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3447     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3448 
3449     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3450     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3451 
3452     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3453     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3454 
3455     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3456 
3457   } else { /* call == MAT_INITIAL_MATRIX) */
3458     PetscBool flg;
3459 
3460     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3461     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3462 
3463     /* (1) iscol -> nonscalable iscol_local */
3464     /* Check for special case: each processor gets entire matrix columns */
3465     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3466     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3467     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3468     if (allcolumns) {
3469       iscol_sub = iscol_local;
3470       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3471       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3472 
3473     } else {
3474       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3475       PetscInt *idx,*cmap1,k;
3476       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3477       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3478       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3479       count = 0;
3480       k     = 0;
3481       for (i=0; i<Ncols; i++) {
3482         j = is_idx[i];
3483         if (j >= cstart && j < cend) {
3484           /* diagonal part of mat */
3485           idx[count]     = j;
3486           cmap1[count++] = i; /* column index in submat */
3487         } else if (Bn) {
3488           /* off-diagonal part of mat */
3489           if (j == garray[k]) {
3490             idx[count]     = j;
3491             cmap1[count++] = i;  /* column index in submat */
3492           } else if (j > garray[k]) {
3493             while (j > garray[k] && k < Bn-1) k++;
3494             if (j == garray[k]) {
3495               idx[count]     = j;
3496               cmap1[count++] = i; /* column index in submat */
3497             }
3498           }
3499         }
3500       }
3501       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3502 
3503       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3504       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3505       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3506 
3507       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3508     }
3509 
3510     /* (3) Create sequential Msub */
3511     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3512   }
3513 
3514   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3515   aij  = (Mat_SeqAIJ*)(Msub)->data;
3516   ii   = aij->i;
3517   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3518 
3519   /*
3520       m - number of local rows
3521       Ncols - number of columns (same on all processors)
3522       rstart - first row in new global matrix generated
3523   */
3524   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3525 
3526   if (call == MAT_INITIAL_MATRIX) {
3527     /* (4) Create parallel newmat */
3528     PetscMPIInt    rank,size;
3529     PetscInt       csize;
3530 
3531     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3532     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3533 
3534     /*
3535         Determine the number of non-zeros in the diagonal and off-diagonal
3536         portions of the matrix in order to do correct preallocation
3537     */
3538 
3539     /* first get start and end of "diagonal" columns */
3540     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3541     if (csize == PETSC_DECIDE) {
3542       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3543       if (mglobal == Ncols) { /* square matrix */
3544         nlocal = m;
3545       } else {
3546         nlocal = Ncols/size + ((Ncols % size) > rank);
3547       }
3548     } else {
3549       nlocal = csize;
3550     }
3551     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3552     rstart = rend - nlocal;
3553     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3554 
3555     /* next, compute all the lengths */
3556     jj    = aij->j;
3557     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3558     olens = dlens + m;
3559     for (i=0; i<m; i++) {
3560       jend = ii[i+1] - ii[i];
3561       olen = 0;
3562       dlen = 0;
3563       for (j=0; j<jend; j++) {
3564         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3565         else dlen++;
3566         jj++;
3567       }
3568       olens[i] = olen;
3569       dlens[i] = dlen;
3570     }
3571 
3572     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3573     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3574 
3575     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3576     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3577     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3578     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3579     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3580     ierr = PetscFree(dlens);CHKERRQ(ierr);
3581 
3582   } else { /* call == MAT_REUSE_MATRIX */
3583     M    = *newmat;
3584     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3585     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3586     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3587     /*
3588          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3589        rather than the slower MatSetValues().
3590     */
3591     M->was_assembled = PETSC_TRUE;
3592     M->assembled     = PETSC_FALSE;
3593   }
3594 
3595   /* (5) Set values of Msub to *newmat */
3596   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3597   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3598 
3599   jj   = aij->j;
3600   aa   = aij->a;
3601   for (i=0; i<m; i++) {
3602     row = rstart + i;
3603     nz  = ii[i+1] - ii[i];
3604     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3605     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3606     jj += nz; aa += nz;
3607   }
3608   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3609 
3610   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3612 
3613   ierr = PetscFree(colsub);CHKERRQ(ierr);
3614 
3615   /* save Msub, iscol_sub and iscmap used in processor for next request */
3616   if (call ==  MAT_INITIAL_MATRIX) {
3617     *newmat = M;
3618     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3619     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3620 
3621     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3622     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3623 
3624     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3625     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3626 
3627     if (iscol_local) {
3628       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3629       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3630     }
3631   }
3632   PetscFunctionReturn(0);
3633 }
3634 
3635 /*
3636     Not great since it makes two copies of the submatrix, first an SeqAIJ
3637   in local and then by concatenating the local matrices the end result.
3638   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3639 
3640   Note: This requires a sequential iscol with all indices.
3641 */
3642 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3643 {
3644   PetscErrorCode ierr;
3645   PetscMPIInt    rank,size;
3646   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3647   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3648   Mat            M,Mreuse;
3649   MatScalar      *aa,*vwork;
3650   MPI_Comm       comm;
3651   Mat_SeqAIJ     *aij;
3652   PetscBool      colflag,allcolumns=PETSC_FALSE;
3653 
3654   PetscFunctionBegin;
3655   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3656   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3658 
3659   /* Check for special case: each processor gets entire matrix columns */
3660   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3661   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3662   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3663   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3664 
3665   if (call ==  MAT_REUSE_MATRIX) {
3666     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3667     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3668     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3669   } else {
3670     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3671   }
3672 
3673   /*
3674       m - number of local rows
3675       n - number of columns (same on all processors)
3676       rstart - first row in new global matrix generated
3677   */
3678   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3679   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3680   if (call == MAT_INITIAL_MATRIX) {
3681     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3682     ii  = aij->i;
3683     jj  = aij->j;
3684 
3685     /*
3686         Determine the number of non-zeros in the diagonal and off-diagonal
3687         portions of the matrix in order to do correct preallocation
3688     */
3689 
3690     /* first get start and end of "diagonal" columns */
3691     if (csize == PETSC_DECIDE) {
3692       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3693       if (mglobal == n) { /* square matrix */
3694         nlocal = m;
3695       } else {
3696         nlocal = n/size + ((n % size) > rank);
3697       }
3698     } else {
3699       nlocal = csize;
3700     }
3701     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3702     rstart = rend - nlocal;
3703     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3704 
3705     /* next, compute all the lengths */
3706     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3707     olens = dlens + m;
3708     for (i=0; i<m; i++) {
3709       jend = ii[i+1] - ii[i];
3710       olen = 0;
3711       dlen = 0;
3712       for (j=0; j<jend; j++) {
3713         if (*jj < rstart || *jj >= rend) olen++;
3714         else dlen++;
3715         jj++;
3716       }
3717       olens[i] = olen;
3718       dlens[i] = dlen;
3719     }
3720     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3721     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3722     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3723     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3724     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3725     ierr = PetscFree(dlens);CHKERRQ(ierr);
3726   } else {
3727     PetscInt ml,nl;
3728 
3729     M    = *newmat;
3730     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3731     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3732     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3733     /*
3734          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3735        rather than the slower MatSetValues().
3736     */
3737     M->was_assembled = PETSC_TRUE;
3738     M->assembled     = PETSC_FALSE;
3739   }
3740   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3741   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3742   ii   = aij->i;
3743   jj   = aij->j;
3744   aa   = aij->a;
3745   for (i=0; i<m; i++) {
3746     row   = rstart + i;
3747     nz    = ii[i+1] - ii[i];
3748     cwork = jj;     jj += nz;
3749     vwork = aa;     aa += nz;
3750     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3751   }
3752 
3753   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3754   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755   *newmat = M;
3756 
3757   /* save submatrix used in processor for next request */
3758   if (call ==  MAT_INITIAL_MATRIX) {
3759     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3760     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3761   }
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3766 {
3767   PetscInt       m,cstart, cend,j,nnz,i,d;
3768   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3769   const PetscInt *JJ;
3770   PetscErrorCode ierr;
3771   PetscBool      nooffprocentries;
3772 
3773   PetscFunctionBegin;
3774   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3775 
3776   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3777   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3778   m      = B->rmap->n;
3779   cstart = B->cmap->rstart;
3780   cend   = B->cmap->rend;
3781   rstart = B->rmap->rstart;
3782 
3783   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3784 
3785   if (PetscDefined(USE_DEBUG)) {
3786     for (i=0; i<m; i++) {
3787       nnz = Ii[i+1]- Ii[i];
3788       JJ  = J + Ii[i];
3789       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3790       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3791       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3792     }
3793   }
3794 
3795   for (i=0; i<m; i++) {
3796     nnz     = Ii[i+1]- Ii[i];
3797     JJ      = J + Ii[i];
3798     nnz_max = PetscMax(nnz_max,nnz);
3799     d       = 0;
3800     for (j=0; j<nnz; j++) {
3801       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3802     }
3803     d_nnz[i] = d;
3804     o_nnz[i] = nnz - d;
3805   }
3806   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3807   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3808 
3809   for (i=0; i<m; i++) {
3810     ii   = i + rstart;
3811     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3812   }
3813   nooffprocentries    = B->nooffprocentries;
3814   B->nooffprocentries = PETSC_TRUE;
3815   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3816   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3817   B->nooffprocentries = nooffprocentries;
3818 
3819   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3820   PetscFunctionReturn(0);
3821 }
3822 
3823 /*@
3824    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3825    (the default parallel PETSc format).
3826 
3827    Collective
3828 
3829    Input Parameters:
3830 +  B - the matrix
3831 .  i - the indices into j for the start of each local row (starts with zero)
3832 .  j - the column indices for each local row (starts with zero)
3833 -  v - optional values in the matrix
3834 
3835    Level: developer
3836 
3837    Notes:
3838        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3839      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3840      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3841 
3842        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3843 
3844        The format which is used for the sparse matrix input, is equivalent to a
3845     row-major ordering.. i.e for the following matrix, the input data expected is
3846     as shown
3847 
3848 $        1 0 0
3849 $        2 0 3     P0
3850 $       -------
3851 $        4 5 6     P1
3852 $
3853 $     Process0 [P0]: rows_owned=[0,1]
3854 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3855 $        j =  {0,0,2}  [size = 3]
3856 $        v =  {1,2,3}  [size = 3]
3857 $
3858 $     Process1 [P1]: rows_owned=[2]
3859 $        i =  {0,3}    [size = nrow+1  = 1+1]
3860 $        j =  {0,1,2}  [size = 3]
3861 $        v =  {4,5,6}  [size = 3]
3862 
3863 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3864           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3865 @*/
3866 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3867 {
3868   PetscErrorCode ierr;
3869 
3870   PetscFunctionBegin;
3871   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 /*@C
3876    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3877    (the default parallel PETSc format).  For good matrix assembly performance
3878    the user should preallocate the matrix storage by setting the parameters
3879    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3880    performance can be increased by more than a factor of 50.
3881 
3882    Collective
3883 
3884    Input Parameters:
3885 +  B - the matrix
3886 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3887            (same value is used for all local rows)
3888 .  d_nnz - array containing the number of nonzeros in the various rows of the
3889            DIAGONAL portion of the local submatrix (possibly different for each row)
3890            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3891            The size of this array is equal to the number of local rows, i.e 'm'.
3892            For matrices that will be factored, you must leave room for (and set)
3893            the diagonal entry even if it is zero.
3894 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3895            submatrix (same value is used for all local rows).
3896 -  o_nnz - array containing the number of nonzeros in the various rows of the
3897            OFF-DIAGONAL portion of the local submatrix (possibly different for
3898            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3899            structure. The size of this array is equal to the number
3900            of local rows, i.e 'm'.
3901 
3902    If the *_nnz parameter is given then the *_nz parameter is ignored
3903 
3904    The AIJ format (also called the Yale sparse matrix format or
3905    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3906    storage.  The stored row and column indices begin with zero.
3907    See Users-Manual: ch_mat for details.
3908 
3909    The parallel matrix is partitioned such that the first m0 rows belong to
3910    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3911    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3912 
3913    The DIAGONAL portion of the local submatrix of a processor can be defined
3914    as the submatrix which is obtained by extraction the part corresponding to
3915    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3916    first row that belongs to the processor, r2 is the last row belonging to
3917    the this processor, and c1-c2 is range of indices of the local part of a
3918    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3919    common case of a square matrix, the row and column ranges are the same and
3920    the DIAGONAL part is also square. The remaining portion of the local
3921    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3922 
3923    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3924 
3925    You can call MatGetInfo() to get information on how effective the preallocation was;
3926    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3927    You can also run with the option -info and look for messages with the string
3928    malloc in them to see if additional memory allocation was needed.
3929 
3930    Example usage:
3931 
3932    Consider the following 8x8 matrix with 34 non-zero values, that is
3933    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3934    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3935    as follows:
3936 
3937 .vb
3938             1  2  0  |  0  3  0  |  0  4
3939     Proc0   0  5  6  |  7  0  0  |  8  0
3940             9  0 10  | 11  0  0  | 12  0
3941     -------------------------------------
3942            13  0 14  | 15 16 17  |  0  0
3943     Proc1   0 18  0  | 19 20 21  |  0  0
3944             0  0  0  | 22 23  0  | 24  0
3945     -------------------------------------
3946     Proc2  25 26 27  |  0  0 28  | 29  0
3947            30  0  0  | 31 32 33  |  0 34
3948 .ve
3949 
3950    This can be represented as a collection of submatrices as:
3951 
3952 .vb
3953       A B C
3954       D E F
3955       G H I
3956 .ve
3957 
3958    Where the submatrices A,B,C are owned by proc0, D,E,F are
3959    owned by proc1, G,H,I are owned by proc2.
3960 
3961    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3962    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3963    The 'M','N' parameters are 8,8, and have the same values on all procs.
3964 
3965    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3966    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3967    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3968    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3969    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3970    matrix, ans [DF] as another SeqAIJ matrix.
3971 
3972    When d_nz, o_nz parameters are specified, d_nz storage elements are
3973    allocated for every row of the local diagonal submatrix, and o_nz
3974    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3975    One way to choose d_nz and o_nz is to use the max nonzerors per local
3976    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3977    In this case, the values of d_nz,o_nz are:
3978 .vb
3979      proc0 : dnz = 2, o_nz = 2
3980      proc1 : dnz = 3, o_nz = 2
3981      proc2 : dnz = 1, o_nz = 4
3982 .ve
3983    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3984    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3985    for proc3. i.e we are using 12+15+10=37 storage locations to store
3986    34 values.
3987 
3988    When d_nnz, o_nnz parameters are specified, the storage is specified
3989    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3990    In the above case the values for d_nnz,o_nnz are:
3991 .vb
3992      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3993      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3994      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3995 .ve
3996    Here the space allocated is sum of all the above values i.e 34, and
3997    hence pre-allocation is perfect.
3998 
3999    Level: intermediate
4000 
4001 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4002           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4003 @*/
4004 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4005 {
4006   PetscErrorCode ierr;
4007 
4008   PetscFunctionBegin;
4009   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4010   PetscValidType(B,1);
4011   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4012   PetscFunctionReturn(0);
4013 }
4014 
4015 /*@
4016      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4017          CSR format for the local rows.
4018 
4019    Collective
4020 
4021    Input Parameters:
4022 +  comm - MPI communicator
4023 .  m - number of local rows (Cannot be PETSC_DECIDE)
4024 .  n - This value should be the same as the local size used in creating the
4025        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4026        calculated if N is given) For square matrices n is almost always m.
4027 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4028 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4029 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4030 .   j - column indices
4031 -   a - matrix values
4032 
4033    Output Parameter:
4034 .   mat - the matrix
4035 
4036    Level: intermediate
4037 
4038    Notes:
4039        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4040      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4041      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4042 
4043        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4044 
4045        The format which is used for the sparse matrix input, is equivalent to a
4046     row-major ordering.. i.e for the following matrix, the input data expected is
4047     as shown
4048 
4049        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4050 
4051 $        1 0 0
4052 $        2 0 3     P0
4053 $       -------
4054 $        4 5 6     P1
4055 $
4056 $     Process0 [P0]: rows_owned=[0,1]
4057 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4058 $        j =  {0,0,2}  [size = 3]
4059 $        v =  {1,2,3}  [size = 3]
4060 $
4061 $     Process1 [P1]: rows_owned=[2]
4062 $        i =  {0,3}    [size = nrow+1  = 1+1]
4063 $        j =  {0,1,2}  [size = 3]
4064 $        v =  {4,5,6}  [size = 3]
4065 
4066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4067           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4068 @*/
4069 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4070 {
4071   PetscErrorCode ierr;
4072 
4073   PetscFunctionBegin;
4074   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4075   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4076   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4077   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4078   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4079   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4080   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4081   PetscFunctionReturn(0);
4082 }
4083 
4084 /*@
4085      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4086          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4087 
4088    Collective
4089 
4090    Input Parameters:
4091 +  mat - the matrix
4092 .  m - number of local rows (Cannot be PETSC_DECIDE)
4093 .  n - This value should be the same as the local size used in creating the
4094        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4095        calculated if N is given) For square matrices n is almost always m.
4096 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4097 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4098 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4099 .  J - column indices
4100 -  v - matrix values
4101 
4102    Level: intermediate
4103 
4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4105           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4106 @*/
4107 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4108 {
4109   PetscErrorCode ierr;
4110   PetscInt       cstart,nnz,i,j;
4111   PetscInt       *ld;
4112   PetscBool      nooffprocentries;
4113   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4114   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4115   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4116   const PetscInt *Adi = Ad->i;
4117   PetscInt       ldi,Iii,md;
4118 
4119   PetscFunctionBegin;
4120   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4121   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4122   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4123   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4124 
4125   cstart = mat->cmap->rstart;
4126   if (!Aij->ld) {
4127     /* count number of entries below block diagonal */
4128     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4129     Aij->ld = ld;
4130     for (i=0; i<m; i++) {
4131       nnz  = Ii[i+1]- Ii[i];
4132       j     = 0;
4133       while  (J[j] < cstart && j < nnz) {j++;}
4134       J    += nnz;
4135       ld[i] = j;
4136     }
4137   } else {
4138     ld = Aij->ld;
4139   }
4140 
4141   for (i=0; i<m; i++) {
4142     nnz  = Ii[i+1]- Ii[i];
4143     Iii  = Ii[i];
4144     ldi  = ld[i];
4145     md   = Adi[i+1]-Adi[i];
4146     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4147     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4148     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4149     ad  += md;
4150     ao  += nnz - md;
4151   }
4152   nooffprocentries      = mat->nooffprocentries;
4153   mat->nooffprocentries = PETSC_TRUE;
4154   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4155   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4156   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4157   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4158   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4159   mat->nooffprocentries = nooffprocentries;
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 /*@C
4164    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4165    (the default parallel PETSc format).  For good matrix assembly performance
4166    the user should preallocate the matrix storage by setting the parameters
4167    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4168    performance can be increased by more than a factor of 50.
4169 
4170    Collective
4171 
4172    Input Parameters:
4173 +  comm - MPI communicator
4174 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4175            This value should be the same as the local size used in creating the
4176            y vector for the matrix-vector product y = Ax.
4177 .  n - This value should be the same as the local size used in creating the
4178        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4179        calculated if N is given) For square matrices n is almost always m.
4180 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4181 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4182 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4183            (same value is used for all local rows)
4184 .  d_nnz - array containing the number of nonzeros in the various rows of the
4185            DIAGONAL portion of the local submatrix (possibly different for each row)
4186            or NULL, if d_nz is used to specify the nonzero structure.
4187            The size of this array is equal to the number of local rows, i.e 'm'.
4188 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4189            submatrix (same value is used for all local rows).
4190 -  o_nnz - array containing the number of nonzeros in the various rows of the
4191            OFF-DIAGONAL portion of the local submatrix (possibly different for
4192            each row) or NULL, if o_nz is used to specify the nonzero
4193            structure. The size of this array is equal to the number
4194            of local rows, i.e 'm'.
4195 
4196    Output Parameter:
4197 .  A - the matrix
4198 
4199    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4200    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4201    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4202 
4203    Notes:
4204    If the *_nnz parameter is given then the *_nz parameter is ignored
4205 
4206    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4207    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4208    storage requirements for this matrix.
4209 
4210    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4211    processor than it must be used on all processors that share the object for
4212    that argument.
4213 
4214    The user MUST specify either the local or global matrix dimensions
4215    (possibly both).
4216 
4217    The parallel matrix is partitioned across processors such that the
4218    first m0 rows belong to process 0, the next m1 rows belong to
4219    process 1, the next m2 rows belong to process 2 etc.. where
4220    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4221    values corresponding to [m x N] submatrix.
4222 
4223    The columns are logically partitioned with the n0 columns belonging
4224    to 0th partition, the next n1 columns belonging to the next
4225    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4226 
4227    The DIAGONAL portion of the local submatrix on any given processor
4228    is the submatrix corresponding to the rows and columns m,n
4229    corresponding to the given processor. i.e diagonal matrix on
4230    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4231    etc. The remaining portion of the local submatrix [m x (N-n)]
4232    constitute the OFF-DIAGONAL portion. The example below better
4233    illustrates this concept.
4234 
4235    For a square global matrix we define each processor's diagonal portion
4236    to be its local rows and the corresponding columns (a square submatrix);
4237    each processor's off-diagonal portion encompasses the remainder of the
4238    local matrix (a rectangular submatrix).
4239 
4240    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4241 
4242    When calling this routine with a single process communicator, a matrix of
4243    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4244    type of communicator, use the construction mechanism
4245 .vb
4246      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4247 .ve
4248 
4249 $     MatCreate(...,&A);
4250 $     MatSetType(A,MATMPIAIJ);
4251 $     MatSetSizes(A, m,n,M,N);
4252 $     MatMPIAIJSetPreallocation(A,...);
4253 
4254    By default, this format uses inodes (identical nodes) when possible.
4255    We search for consecutive rows with the same nonzero structure, thereby
4256    reusing matrix information to achieve increased efficiency.
4257 
4258    Options Database Keys:
4259 +  -mat_no_inode  - Do not use inodes
4260 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4261 
4262 
4263 
4264    Example usage:
4265 
4266    Consider the following 8x8 matrix with 34 non-zero values, that is
4267    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4268    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4269    as follows
4270 
4271 .vb
4272             1  2  0  |  0  3  0  |  0  4
4273     Proc0   0  5  6  |  7  0  0  |  8  0
4274             9  0 10  | 11  0  0  | 12  0
4275     -------------------------------------
4276            13  0 14  | 15 16 17  |  0  0
4277     Proc1   0 18  0  | 19 20 21  |  0  0
4278             0  0  0  | 22 23  0  | 24  0
4279     -------------------------------------
4280     Proc2  25 26 27  |  0  0 28  | 29  0
4281            30  0  0  | 31 32 33  |  0 34
4282 .ve
4283 
4284    This can be represented as a collection of submatrices as
4285 
4286 .vb
4287       A B C
4288       D E F
4289       G H I
4290 .ve
4291 
4292    Where the submatrices A,B,C are owned by proc0, D,E,F are
4293    owned by proc1, G,H,I are owned by proc2.
4294 
4295    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4296    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4297    The 'M','N' parameters are 8,8, and have the same values on all procs.
4298 
4299    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4300    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4301    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4302    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4303    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4304    matrix, ans [DF] as another SeqAIJ matrix.
4305 
4306    When d_nz, o_nz parameters are specified, d_nz storage elements are
4307    allocated for every row of the local diagonal submatrix, and o_nz
4308    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4309    One way to choose d_nz and o_nz is to use the max nonzerors per local
4310    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4311    In this case, the values of d_nz,o_nz are
4312 .vb
4313      proc0 : dnz = 2, o_nz = 2
4314      proc1 : dnz = 3, o_nz = 2
4315      proc2 : dnz = 1, o_nz = 4
4316 .ve
4317    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4318    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4319    for proc3. i.e we are using 12+15+10=37 storage locations to store
4320    34 values.
4321 
4322    When d_nnz, o_nnz parameters are specified, the storage is specified
4323    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4324    In the above case the values for d_nnz,o_nnz are
4325 .vb
4326      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4327      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4328      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4329 .ve
4330    Here the space allocated is sum of all the above values i.e 34, and
4331    hence pre-allocation is perfect.
4332 
4333    Level: intermediate
4334 
4335 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4336           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4337 @*/
4338 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4339 {
4340   PetscErrorCode ierr;
4341   PetscMPIInt    size;
4342 
4343   PetscFunctionBegin;
4344   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4345   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4346   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4347   if (size > 1) {
4348     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4349     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4350   } else {
4351     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4352     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4353   }
4354   PetscFunctionReturn(0);
4355 }
4356 
4357 /*@C
4358   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4359 
4360   Not collective
4361 
4362   Input Parameter:
4363 . A - The MPIAIJ matrix
4364 
4365   Output Parameters:
4366 + Ad - The local diagonal block as a SeqAIJ matrix
4367 . Ao - The local off-diagonal block as a SeqAIJ matrix
4368 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4369 
4370   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4371   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4372   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4373   local column numbers to global column numbers in the original matrix.
4374 
4375   Level: intermediate
4376 
4377 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4378 @*/
4379 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4380 {
4381   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4382   PetscBool      flg;
4383   PetscErrorCode ierr;
4384 
4385   PetscFunctionBegin;
4386   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4387   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4388   if (Ad)     *Ad     = a->A;
4389   if (Ao)     *Ao     = a->B;
4390   if (colmap) *colmap = a->garray;
4391   PetscFunctionReturn(0);
4392 }
4393 
4394 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4395 {
4396   PetscErrorCode ierr;
4397   PetscInt       m,N,i,rstart,nnz,Ii;
4398   PetscInt       *indx;
4399   PetscScalar    *values;
4400 
4401   PetscFunctionBegin;
4402   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4403   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4404     PetscInt       *dnz,*onz,sum,bs,cbs;
4405 
4406     if (n == PETSC_DECIDE) {
4407       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4408     }
4409     /* Check sum(n) = N */
4410     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4411     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4412 
4413     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4414     rstart -= m;
4415 
4416     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4417     for (i=0; i<m; i++) {
4418       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4419       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4420       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4421     }
4422 
4423     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4424     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4425     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4426     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4427     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4428     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4429     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4430     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4431   }
4432 
4433   /* numeric phase */
4434   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4435   for (i=0; i<m; i++) {
4436     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4437     Ii   = i + rstart;
4438     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4439     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4440   }
4441   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4442   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4443   PetscFunctionReturn(0);
4444 }
4445 
4446 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4447 {
4448   PetscErrorCode    ierr;
4449   PetscMPIInt       rank;
4450   PetscInt          m,N,i,rstart,nnz;
4451   size_t            len;
4452   const PetscInt    *indx;
4453   PetscViewer       out;
4454   char              *name;
4455   Mat               B;
4456   const PetscScalar *values;
4457 
4458   PetscFunctionBegin;
4459   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4460   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4461   /* Should this be the type of the diagonal block of A? */
4462   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4463   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4464   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4465   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4466   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4467   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4468   for (i=0; i<m; i++) {
4469     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4470     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4471     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4472   }
4473   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4474   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4475 
4476   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4477   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4478   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4479   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4480   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4481   ierr = PetscFree(name);CHKERRQ(ierr);
4482   ierr = MatView(B,out);CHKERRQ(ierr);
4483   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4484   ierr = MatDestroy(&B);CHKERRQ(ierr);
4485   PetscFunctionReturn(0);
4486 }
4487 
4488 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4489 {
4490   PetscErrorCode      ierr;
4491   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4492 
4493   PetscFunctionBegin;
4494   if (!merge) PetscFunctionReturn(0);
4495   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4496   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4497   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4498   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4499   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4500   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4501   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4502   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4503   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4504   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4505   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4506   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4507   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4508   ierr = PetscFree(merge);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #include <../src/mat/utils/freespace.h>
4513 #include <petscbt.h>
4514 
4515 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4516 {
4517   PetscErrorCode      ierr;
4518   MPI_Comm            comm;
4519   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4520   PetscMPIInt         size,rank,taga,*len_s;
4521   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4522   PetscInt            proc,m;
4523   PetscInt            **buf_ri,**buf_rj;
4524   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4525   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4526   MPI_Request         *s_waits,*r_waits;
4527   MPI_Status          *status;
4528   MatScalar           *aa=a->a;
4529   MatScalar           **abuf_r,*ba_i;
4530   Mat_Merge_SeqsToMPI *merge;
4531   PetscContainer      container;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4535   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4536 
4537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4539 
4540   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4541   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4542   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4543 
4544   bi     = merge->bi;
4545   bj     = merge->bj;
4546   buf_ri = merge->buf_ri;
4547   buf_rj = merge->buf_rj;
4548 
4549   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4550   owners = merge->rowmap->range;
4551   len_s  = merge->len_s;
4552 
4553   /* send and recv matrix values */
4554   /*-----------------------------*/
4555   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4556   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4557 
4558   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4559   for (proc=0,k=0; proc<size; proc++) {
4560     if (!len_s[proc]) continue;
4561     i    = owners[proc];
4562     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4563     k++;
4564   }
4565 
4566   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4567   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4568   ierr = PetscFree(status);CHKERRQ(ierr);
4569 
4570   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4571   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4572 
4573   /* insert mat values of mpimat */
4574   /*----------------------------*/
4575   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4576   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4577 
4578   for (k=0; k<merge->nrecv; k++) {
4579     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4580     nrows       = *(buf_ri_k[k]);
4581     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4582     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4583   }
4584 
4585   /* set values of ba */
4586   m = merge->rowmap->n;
4587   for (i=0; i<m; i++) {
4588     arow = owners[rank] + i;
4589     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4590     bnzi = bi[i+1] - bi[i];
4591     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4592 
4593     /* add local non-zero vals of this proc's seqmat into ba */
4594     anzi   = ai[arow+1] - ai[arow];
4595     aj     = a->j + ai[arow];
4596     aa     = a->a + ai[arow];
4597     nextaj = 0;
4598     for (j=0; nextaj<anzi; j++) {
4599       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4600         ba_i[j] += aa[nextaj++];
4601       }
4602     }
4603 
4604     /* add received vals into ba */
4605     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4606       /* i-th row */
4607       if (i == *nextrow[k]) {
4608         anzi   = *(nextai[k]+1) - *nextai[k];
4609         aj     = buf_rj[k] + *(nextai[k]);
4610         aa     = abuf_r[k] + *(nextai[k]);
4611         nextaj = 0;
4612         for (j=0; nextaj<anzi; j++) {
4613           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4614             ba_i[j] += aa[nextaj++];
4615           }
4616         }
4617         nextrow[k]++; nextai[k]++;
4618       }
4619     }
4620     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4621   }
4622   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4623   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4624 
4625   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4626   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4627   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4628   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4629   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4630   PetscFunctionReturn(0);
4631 }
4632 
4633 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4634 {
4635   PetscErrorCode      ierr;
4636   Mat                 B_mpi;
4637   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4638   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4639   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4640   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4641   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4642   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4643   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4644   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4645   MPI_Status          *status;
4646   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4647   PetscBT             lnkbt;
4648   Mat_Merge_SeqsToMPI *merge;
4649   PetscContainer      container;
4650 
4651   PetscFunctionBegin;
4652   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4653 
4654   /* make sure it is a PETSc comm */
4655   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4657   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4658 
4659   ierr = PetscNew(&merge);CHKERRQ(ierr);
4660   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4661 
4662   /* determine row ownership */
4663   /*---------------------------------------------------------*/
4664   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4669   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4670   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4671 
4672   m      = merge->rowmap->n;
4673   owners = merge->rowmap->range;
4674 
4675   /* determine the number of messages to send, their lengths */
4676   /*---------------------------------------------------------*/
4677   len_s = merge->len_s;
4678 
4679   len          = 0; /* length of buf_si[] */
4680   merge->nsend = 0;
4681   for (proc=0; proc<size; proc++) {
4682     len_si[proc] = 0;
4683     if (proc == rank) {
4684       len_s[proc] = 0;
4685     } else {
4686       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4687       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4688     }
4689     if (len_s[proc]) {
4690       merge->nsend++;
4691       nrows = 0;
4692       for (i=owners[proc]; i<owners[proc+1]; i++) {
4693         if (ai[i+1] > ai[i]) nrows++;
4694       }
4695       len_si[proc] = 2*(nrows+1);
4696       len         += len_si[proc];
4697     }
4698   }
4699 
4700   /* determine the number and length of messages to receive for ij-structure */
4701   /*-------------------------------------------------------------------------*/
4702   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4703   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4704 
4705   /* post the Irecv of j-structure */
4706   /*-------------------------------*/
4707   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4708   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4709 
4710   /* post the Isend of j-structure */
4711   /*--------------------------------*/
4712   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4713 
4714   for (proc=0, k=0; proc<size; proc++) {
4715     if (!len_s[proc]) continue;
4716     i    = owners[proc];
4717     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4718     k++;
4719   }
4720 
4721   /* receives and sends of j-structure are complete */
4722   /*------------------------------------------------*/
4723   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4724   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4725 
4726   /* send and recv i-structure */
4727   /*---------------------------*/
4728   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4729   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4730 
4731   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4732   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4733   for (proc=0,k=0; proc<size; proc++) {
4734     if (!len_s[proc]) continue;
4735     /* form outgoing message for i-structure:
4736          buf_si[0]:                 nrows to be sent
4737                [1:nrows]:           row index (global)
4738                [nrows+1:2*nrows+1]: i-structure index
4739     */
4740     /*-------------------------------------------*/
4741     nrows       = len_si[proc]/2 - 1;
4742     buf_si_i    = buf_si + nrows+1;
4743     buf_si[0]   = nrows;
4744     buf_si_i[0] = 0;
4745     nrows       = 0;
4746     for (i=owners[proc]; i<owners[proc+1]; i++) {
4747       anzi = ai[i+1] - ai[i];
4748       if (anzi) {
4749         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4750         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4751         nrows++;
4752       }
4753     }
4754     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4755     k++;
4756     buf_si += len_si[proc];
4757   }
4758 
4759   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4760   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4761 
4762   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4763   for (i=0; i<merge->nrecv; i++) {
4764     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4765   }
4766 
4767   ierr = PetscFree(len_si);CHKERRQ(ierr);
4768   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4769   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4770   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4771   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4772   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4773   ierr = PetscFree(status);CHKERRQ(ierr);
4774 
4775   /* compute a local seq matrix in each processor */
4776   /*----------------------------------------------*/
4777   /* allocate bi array and free space for accumulating nonzero column info */
4778   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4779   bi[0] = 0;
4780 
4781   /* create and initialize a linked list */
4782   nlnk = N+1;
4783   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4784 
4785   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4786   len  = ai[owners[rank+1]] - ai[owners[rank]];
4787   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4788 
4789   current_space = free_space;
4790 
4791   /* determine symbolic info for each local row */
4792   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4793 
4794   for (k=0; k<merge->nrecv; k++) {
4795     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4796     nrows       = *buf_ri_k[k];
4797     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4798     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4799   }
4800 
4801   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4802   len  = 0;
4803   for (i=0; i<m; i++) {
4804     bnzi = 0;
4805     /* add local non-zero cols of this proc's seqmat into lnk */
4806     arow  = owners[rank] + i;
4807     anzi  = ai[arow+1] - ai[arow];
4808     aj    = a->j + ai[arow];
4809     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4810     bnzi += nlnk;
4811     /* add received col data into lnk */
4812     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4813       if (i == *nextrow[k]) { /* i-th row */
4814         anzi  = *(nextai[k]+1) - *nextai[k];
4815         aj    = buf_rj[k] + *nextai[k];
4816         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4817         bnzi += nlnk;
4818         nextrow[k]++; nextai[k]++;
4819       }
4820     }
4821     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4822 
4823     /* if free space is not available, make more free space */
4824     if (current_space->local_remaining<bnzi) {
4825       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4826       nspacedouble++;
4827     }
4828     /* copy data into free space, then initialize lnk */
4829     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4830     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4831 
4832     current_space->array           += bnzi;
4833     current_space->local_used      += bnzi;
4834     current_space->local_remaining -= bnzi;
4835 
4836     bi[i+1] = bi[i] + bnzi;
4837   }
4838 
4839   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4840 
4841   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4842   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4843   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4844 
4845   /* create symbolic parallel matrix B_mpi */
4846   /*---------------------------------------*/
4847   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4848   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4849   if (n==PETSC_DECIDE) {
4850     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4851   } else {
4852     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4853   }
4854   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4855   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4856   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4857   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4858   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4859 
4860   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4861   B_mpi->assembled  = PETSC_FALSE;
4862   merge->bi         = bi;
4863   merge->bj         = bj;
4864   merge->buf_ri     = buf_ri;
4865   merge->buf_rj     = buf_rj;
4866   merge->coi        = NULL;
4867   merge->coj        = NULL;
4868   merge->owners_co  = NULL;
4869 
4870   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4871 
4872   /* attach the supporting struct to B_mpi for reuse */
4873   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4874   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4875   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4876   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4877   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4878   *mpimat = B_mpi;
4879 
4880   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4881   PetscFunctionReturn(0);
4882 }
4883 
4884 /*@C
4885       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4886                  matrices from each processor
4887 
4888     Collective
4889 
4890    Input Parameters:
4891 +    comm - the communicators the parallel matrix will live on
4892 .    seqmat - the input sequential matrices
4893 .    m - number of local rows (or PETSC_DECIDE)
4894 .    n - number of local columns (or PETSC_DECIDE)
4895 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4896 
4897    Output Parameter:
4898 .    mpimat - the parallel matrix generated
4899 
4900     Level: advanced
4901 
4902    Notes:
4903      The dimensions of the sequential matrix in each processor MUST be the same.
4904      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4905      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4906 @*/
4907 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4908 {
4909   PetscErrorCode ierr;
4910   PetscMPIInt    size;
4911 
4912   PetscFunctionBegin;
4913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4914   if (size == 1) {
4915     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4916     if (scall == MAT_INITIAL_MATRIX) {
4917       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4918     } else {
4919       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4920     }
4921     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4922     PetscFunctionReturn(0);
4923   }
4924   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925   if (scall == MAT_INITIAL_MATRIX) {
4926     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4927   }
4928   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4929   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 /*@
4934      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4935           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4936           with MatGetSize()
4937 
4938     Not Collective
4939 
4940    Input Parameters:
4941 +    A - the matrix
4942 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4943 
4944    Output Parameter:
4945 .    A_loc - the local sequential matrix generated
4946 
4947     Level: developer
4948 
4949    Notes:
4950      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4951      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4952      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4953      modify the values of the returned A_loc.
4954 
4955 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4956 
4957 @*/
4958 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4959 {
4960   PetscErrorCode ierr;
4961   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4962   Mat_SeqAIJ     *mat,*a,*b;
4963   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4964   MatScalar      *aa,*ba,*cam;
4965   PetscScalar    *ca;
4966   PetscMPIInt    size;
4967   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4968   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4969   PetscBool      match;
4970 
4971   PetscFunctionBegin;
4972   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4973   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4974   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4975   if (size == 1) {
4976     if (scall == MAT_INITIAL_MATRIX) {
4977       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4978       *A_loc = mpimat->A;
4979     } else if (scall == MAT_REUSE_MATRIX) {
4980       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4981     }
4982     PetscFunctionReturn(0);
4983   }
4984 
4985   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4986   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4987   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4988   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4989   aa = a->a; ba = b->a;
4990   if (scall == MAT_INITIAL_MATRIX) {
4991     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4992     ci[0] = 0;
4993     for (i=0; i<am; i++) {
4994       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4995     }
4996     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4997     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4998     k    = 0;
4999     for (i=0; i<am; i++) {
5000       ncols_o = bi[i+1] - bi[i];
5001       ncols_d = ai[i+1] - ai[i];
5002       /* off-diagonal portion of A */
5003       for (jo=0; jo<ncols_o; jo++) {
5004         col = cmap[*bj];
5005         if (col >= cstart) break;
5006         cj[k]   = col; bj++;
5007         ca[k++] = *ba++;
5008       }
5009       /* diagonal portion of A */
5010       for (j=0; j<ncols_d; j++) {
5011         cj[k]   = cstart + *aj++;
5012         ca[k++] = *aa++;
5013       }
5014       /* off-diagonal portion of A */
5015       for (j=jo; j<ncols_o; j++) {
5016         cj[k]   = cmap[*bj++];
5017         ca[k++] = *ba++;
5018       }
5019     }
5020     /* put together the new matrix */
5021     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5022     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5023     /* Since these are PETSc arrays, change flags to free them as necessary. */
5024     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5025     mat->free_a  = PETSC_TRUE;
5026     mat->free_ij = PETSC_TRUE;
5027     mat->nonew   = 0;
5028   } else if (scall == MAT_REUSE_MATRIX) {
5029     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5030     ci = mat->i; cj = mat->j; cam = mat->a;
5031     for (i=0; i<am; i++) {
5032       /* off-diagonal portion of A */
5033       ncols_o = bi[i+1] - bi[i];
5034       for (jo=0; jo<ncols_o; jo++) {
5035         col = cmap[*bj];
5036         if (col >= cstart) break;
5037         *cam++ = *ba++; bj++;
5038       }
5039       /* diagonal portion of A */
5040       ncols_d = ai[i+1] - ai[i];
5041       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5042       /* off-diagonal portion of A */
5043       for (j=jo; j<ncols_o; j++) {
5044         *cam++ = *ba++; bj++;
5045       }
5046     }
5047   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5048   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5049   PetscFunctionReturn(0);
5050 }
5051 
5052 /*@C
5053      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5054 
5055     Not Collective
5056 
5057    Input Parameters:
5058 +    A - the matrix
5059 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5060 -    row, col - index sets of rows and columns to extract (or NULL)
5061 
5062    Output Parameter:
5063 .    A_loc - the local sequential matrix generated
5064 
5065     Level: developer
5066 
5067 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5068 
5069 @*/
5070 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5071 {
5072   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5073   PetscErrorCode ierr;
5074   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5075   IS             isrowa,iscola;
5076   Mat            *aloc;
5077   PetscBool      match;
5078 
5079   PetscFunctionBegin;
5080   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5081   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5082   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5083   if (!row) {
5084     start = A->rmap->rstart; end = A->rmap->rend;
5085     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5086   } else {
5087     isrowa = *row;
5088   }
5089   if (!col) {
5090     start = A->cmap->rstart;
5091     cmap  = a->garray;
5092     nzA   = a->A->cmap->n;
5093     nzB   = a->B->cmap->n;
5094     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5095     ncols = 0;
5096     for (i=0; i<nzB; i++) {
5097       if (cmap[i] < start) idx[ncols++] = cmap[i];
5098       else break;
5099     }
5100     imark = i;
5101     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5102     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5103     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5104   } else {
5105     iscola = *col;
5106   }
5107   if (scall != MAT_INITIAL_MATRIX) {
5108     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5109     aloc[0] = *A_loc;
5110   }
5111   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5112   if (!col) { /* attach global id of condensed columns */
5113     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5114   }
5115   *A_loc = aloc[0];
5116   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5117   if (!row) {
5118     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5119   }
5120   if (!col) {
5121     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5122   }
5123   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5124   PetscFunctionReturn(0);
5125 }
5126 
5127 /*
5128  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5129  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5130  * on a global size.
5131  * */
5132 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5133 {
5134   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5135   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5136   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5137   PetscMPIInt              owner;
5138   PetscSFNode              *iremote,*oiremote;
5139   const PetscInt           *lrowindices;
5140   PetscErrorCode           ierr;
5141   PetscSF                  sf,osf;
5142   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5143   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5144   MPI_Comm                 comm;
5145   ISLocalToGlobalMapping   mapping;
5146 
5147   PetscFunctionBegin;
5148   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5149   /* plocalsize is the number of roots
5150    * nrows is the number of leaves
5151    * */
5152   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5153   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5154   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5155   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5156   for (i=0;i<nrows;i++) {
5157     /* Find a remote index and an owner for a row
5158      * The row could be local or remote
5159      * */
5160     owner = 0;
5161     lidx  = 0;
5162     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5163     iremote[i].index = lidx;
5164     iremote[i].rank  = owner;
5165   }
5166   /* Create SF to communicate how many nonzero columns for each row */
5167   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5168   /* SF will figure out the number of nonzero colunms for each row, and their
5169    * offsets
5170    * */
5171   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5172   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5173   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5174 
5175   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5176   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5177   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5178   roffsets[0] = 0;
5179   roffsets[1] = 0;
5180   for (i=0;i<plocalsize;i++) {
5181     /* diag */
5182     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5183     /* off diag */
5184     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5185     /* compute offsets so that we relative location for each row */
5186     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5187     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5188   }
5189   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5190   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5191   /* 'r' means root, and 'l' means leaf */
5192   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5193   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5194   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5195   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5196   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5197   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5198   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5199   dntotalcols = 0;
5200   ontotalcols = 0;
5201   ncol = 0;
5202   for (i=0;i<nrows;i++) {
5203     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5204     ncol = PetscMax(pnnz[i],ncol);
5205     /* diag */
5206     dntotalcols += nlcols[i*2+0];
5207     /* off diag */
5208     ontotalcols += nlcols[i*2+1];
5209   }
5210   /* We do not need to figure the right number of columns
5211    * since all the calculations will be done by going through the raw data
5212    * */
5213   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5214   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5215   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5216   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5217   /* diag */
5218   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5219   /* off diag */
5220   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5221   /* diag */
5222   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5223   /* off diag */
5224   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5225   dntotalcols = 0;
5226   ontotalcols = 0;
5227   ntotalcols  = 0;
5228   for (i=0;i<nrows;i++) {
5229     owner = 0;
5230     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5231     /* Set iremote for diag matrix */
5232     for (j=0;j<nlcols[i*2+0];j++) {
5233       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5234       iremote[dntotalcols].rank    = owner;
5235       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5236       ilocal[dntotalcols++]        = ntotalcols++;
5237     }
5238     /* off diag */
5239     for (j=0;j<nlcols[i*2+1];j++) {
5240       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5241       oiremote[ontotalcols].rank    = owner;
5242       oilocal[ontotalcols++]        = ntotalcols++;
5243     }
5244   }
5245   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5246   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5247   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5248   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5249   /* P serves as roots and P_oth is leaves
5250    * Diag matrix
5251    * */
5252   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5253   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5254   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5255 
5256   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5257   /* Off diag */
5258   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5259   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5260   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5261   /* We operate on the matrix internal data for saving memory */
5262   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5263   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5264   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5265   /* Convert to global indices for diag matrix */
5266   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5267   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5268   /* We want P_oth store global indices */
5269   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5270   /* Use memory scalable approach */
5271   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5272   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5273   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5274   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5275   /* Convert back to local indices */
5276   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5277   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5278   nout = 0;
5279   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5280   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5281   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5282   /* Exchange values */
5283   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5284   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5285   /* Stop PETSc from shrinking memory */
5286   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5287   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5288   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5289   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5290   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5291   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5292   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5293   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5294   PetscFunctionReturn(0);
5295 }
5296 
5297 /*
5298  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5299  * This supports MPIAIJ and MAIJ
5300  * */
5301 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5302 {
5303   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5304   Mat_SeqAIJ            *p_oth;
5305   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5306   IS                    rows,map;
5307   PetscHMapI            hamp;
5308   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5309   MPI_Comm              comm;
5310   PetscSF               sf,osf;
5311   PetscBool             has;
5312   PetscErrorCode        ierr;
5313 
5314   PetscFunctionBegin;
5315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5316   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5317   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5318    *  and then create a submatrix (that often is an overlapping matrix)
5319    * */
5320   if (reuse == MAT_INITIAL_MATRIX) {
5321     /* Use a hash table to figure out unique keys */
5322     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5323     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5324     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5325     count = 0;
5326     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5327     for (i=0;i<a->B->cmap->n;i++) {
5328       key  = a->garray[i]/dof;
5329       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5330       if (!has) {
5331         mapping[i] = count;
5332         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5333       } else {
5334         /* Current 'i' has the same value the previous step */
5335         mapping[i] = count-1;
5336       }
5337     }
5338     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5339     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5340     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5341     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5342     off = 0;
5343     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5344     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5345     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5346     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5347     /* In case, the matrix was already created but users want to recreate the matrix */
5348     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5349     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5350     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5351     ierr = ISDestroy(&map);CHKERRQ(ierr);
5352     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5353   } else if (reuse == MAT_REUSE_MATRIX) {
5354     /* If matrix was already created, we simply update values using SF objects
5355      * that as attached to the matrix ealier.
5356      *  */
5357     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5358     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5359     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5360     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5361     /* Update values in place */
5362     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5363     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5364     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5365     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5366   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5367   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5368   PetscFunctionReturn(0);
5369 }
5370 
5371 /*@C
5372     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5373 
5374     Collective on Mat
5375 
5376    Input Parameters:
5377 +    A,B - the matrices in mpiaij format
5378 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5379 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5380 
5381    Output Parameter:
5382 +    rowb, colb - index sets of rows and columns of B to extract
5383 -    B_seq - the sequential matrix generated
5384 
5385     Level: developer
5386 
5387 @*/
5388 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5389 {
5390   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5391   PetscErrorCode ierr;
5392   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5393   IS             isrowb,iscolb;
5394   Mat            *bseq=NULL;
5395 
5396   PetscFunctionBegin;
5397   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5398     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5399   }
5400   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5401 
5402   if (scall == MAT_INITIAL_MATRIX) {
5403     start = A->cmap->rstart;
5404     cmap  = a->garray;
5405     nzA   = a->A->cmap->n;
5406     nzB   = a->B->cmap->n;
5407     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5408     ncols = 0;
5409     for (i=0; i<nzB; i++) {  /* row < local row index */
5410       if (cmap[i] < start) idx[ncols++] = cmap[i];
5411       else break;
5412     }
5413     imark = i;
5414     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5415     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5416     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5417     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5418   } else {
5419     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5420     isrowb  = *rowb; iscolb = *colb;
5421     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5422     bseq[0] = *B_seq;
5423   }
5424   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5425   *B_seq = bseq[0];
5426   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5427   if (!rowb) {
5428     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5429   } else {
5430     *rowb = isrowb;
5431   }
5432   if (!colb) {
5433     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5434   } else {
5435     *colb = iscolb;
5436   }
5437   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5438   PetscFunctionReturn(0);
5439 }
5440 
5441 /*
5442     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5443     of the OFF-DIAGONAL portion of local A
5444 
5445     Collective on Mat
5446 
5447    Input Parameters:
5448 +    A,B - the matrices in mpiaij format
5449 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5450 
5451    Output Parameter:
5452 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5453 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5454 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5455 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5456 
5457     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5458      for this matrix. This is not desirable..
5459 
5460     Level: developer
5461 
5462 */
5463 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5464 {
5465   PetscErrorCode         ierr;
5466   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5467   Mat_SeqAIJ             *b_oth;
5468   VecScatter             ctx;
5469   MPI_Comm               comm;
5470   const PetscMPIInt      *rprocs,*sprocs;
5471   const PetscInt         *srow,*rstarts,*sstarts;
5472   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5473   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5474   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5475   MPI_Request            *rwaits = NULL,*swaits = NULL;
5476   MPI_Status             rstatus;
5477   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5478 
5479   PetscFunctionBegin;
5480   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5481   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5482 
5483   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5484     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5485   }
5486   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5487   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5488 
5489   if (size == 1) {
5490     startsj_s = NULL;
5491     bufa_ptr  = NULL;
5492     *B_oth    = NULL;
5493     PetscFunctionReturn(0);
5494   }
5495 
5496   ctx = a->Mvctx;
5497   tag = ((PetscObject)ctx)->tag;
5498 
5499   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5500   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5501   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5502   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5503   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5504   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5505   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5506 
5507   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5508   if (scall == MAT_INITIAL_MATRIX) {
5509     /* i-array */
5510     /*---------*/
5511     /*  post receives */
5512     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5513     for (i=0; i<nrecvs; i++) {
5514       rowlen = rvalues + rstarts[i]*rbs;
5515       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5516       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5517     }
5518 
5519     /* pack the outgoing message */
5520     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5521 
5522     sstartsj[0] = 0;
5523     rstartsj[0] = 0;
5524     len         = 0; /* total length of j or a array to be sent */
5525     if (nsends) {
5526       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5527       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5528     }
5529     for (i=0; i<nsends; i++) {
5530       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5531       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5532       for (j=0; j<nrows; j++) {
5533         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5534         for (l=0; l<sbs; l++) {
5535           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5536 
5537           rowlen[j*sbs+l] = ncols;
5538 
5539           len += ncols;
5540           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5541         }
5542         k++;
5543       }
5544       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5545 
5546       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5547     }
5548     /* recvs and sends of i-array are completed */
5549     i = nrecvs;
5550     while (i--) {
5551       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5552     }
5553     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5554     ierr = PetscFree(svalues);CHKERRQ(ierr);
5555 
5556     /* allocate buffers for sending j and a arrays */
5557     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5558     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5559 
5560     /* create i-array of B_oth */
5561     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5562 
5563     b_othi[0] = 0;
5564     len       = 0; /* total length of j or a array to be received */
5565     k         = 0;
5566     for (i=0; i<nrecvs; i++) {
5567       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5568       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5569       for (j=0; j<nrows; j++) {
5570         b_othi[k+1] = b_othi[k] + rowlen[j];
5571         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5572         k++;
5573       }
5574       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5575     }
5576     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5577 
5578     /* allocate space for j and a arrrays of B_oth */
5579     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5580     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5581 
5582     /* j-array */
5583     /*---------*/
5584     /*  post receives of j-array */
5585     for (i=0; i<nrecvs; i++) {
5586       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5587       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5588     }
5589 
5590     /* pack the outgoing message j-array */
5591     if (nsends) k = sstarts[0];
5592     for (i=0; i<nsends; i++) {
5593       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5594       bufJ  = bufj+sstartsj[i];
5595       for (j=0; j<nrows; j++) {
5596         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5597         for (ll=0; ll<sbs; ll++) {
5598           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5599           for (l=0; l<ncols; l++) {
5600             *bufJ++ = cols[l];
5601           }
5602           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5603         }
5604       }
5605       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5606     }
5607 
5608     /* recvs and sends of j-array are completed */
5609     i = nrecvs;
5610     while (i--) {
5611       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5612     }
5613     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5614   } else if (scall == MAT_REUSE_MATRIX) {
5615     sstartsj = *startsj_s;
5616     rstartsj = *startsj_r;
5617     bufa     = *bufa_ptr;
5618     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5619     b_otha   = b_oth->a;
5620   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5621 
5622   /* a-array */
5623   /*---------*/
5624   /*  post receives of a-array */
5625   for (i=0; i<nrecvs; i++) {
5626     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5627     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5628   }
5629 
5630   /* pack the outgoing message a-array */
5631   if (nsends) k = sstarts[0];
5632   for (i=0; i<nsends; i++) {
5633     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5634     bufA  = bufa+sstartsj[i];
5635     for (j=0; j<nrows; j++) {
5636       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5637       for (ll=0; ll<sbs; ll++) {
5638         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5639         for (l=0; l<ncols; l++) {
5640           *bufA++ = vals[l];
5641         }
5642         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5643       }
5644     }
5645     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5646   }
5647   /* recvs and sends of a-array are completed */
5648   i = nrecvs;
5649   while (i--) {
5650     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5651   }
5652   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5653   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5654 
5655   if (scall == MAT_INITIAL_MATRIX) {
5656     /* put together the new matrix */
5657     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5658 
5659     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5660     /* Since these are PETSc arrays, change flags to free them as necessary. */
5661     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5662     b_oth->free_a  = PETSC_TRUE;
5663     b_oth->free_ij = PETSC_TRUE;
5664     b_oth->nonew   = 0;
5665 
5666     ierr = PetscFree(bufj);CHKERRQ(ierr);
5667     if (!startsj_s || !bufa_ptr) {
5668       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5669       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5670     } else {
5671       *startsj_s = sstartsj;
5672       *startsj_r = rstartsj;
5673       *bufa_ptr  = bufa;
5674     }
5675   }
5676 
5677   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5678   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5679   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5680   PetscFunctionReturn(0);
5681 }
5682 
5683 /*@C
5684   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5685 
5686   Not Collective
5687 
5688   Input Parameters:
5689 . A - The matrix in mpiaij format
5690 
5691   Output Parameter:
5692 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5693 . colmap - A map from global column index to local index into lvec
5694 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5695 
5696   Level: developer
5697 
5698 @*/
5699 #if defined(PETSC_USE_CTABLE)
5700 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5701 #else
5702 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5703 #endif
5704 {
5705   Mat_MPIAIJ *a;
5706 
5707   PetscFunctionBegin;
5708   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5709   PetscValidPointer(lvec, 2);
5710   PetscValidPointer(colmap, 3);
5711   PetscValidPointer(multScatter, 4);
5712   a = (Mat_MPIAIJ*) A->data;
5713   if (lvec) *lvec = a->lvec;
5714   if (colmap) *colmap = a->colmap;
5715   if (multScatter) *multScatter = a->Mvctx;
5716   PetscFunctionReturn(0);
5717 }
5718 
5719 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5720 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5721 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5722 #if defined(PETSC_HAVE_MKL_SPARSE)
5723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5724 #endif
5725 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5726 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5727 #if defined(PETSC_HAVE_ELEMENTAL)
5728 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5729 #endif
5730 #if defined(PETSC_HAVE_SCALAPACK)
5731 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5732 #endif
5733 #if defined(PETSC_HAVE_HYPRE)
5734 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5735 #endif
5736 #if defined(PETSC_HAVE_CUDA)
5737 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5738 #endif
5739 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5740 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5741 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5742 
5743 /*
5744     Computes (B'*A')' since computing B*A directly is untenable
5745 
5746                n                       p                          p
5747         [             ]       [             ]         [                 ]
5748       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5749         [             ]       [             ]         [                 ]
5750 
5751 */
5752 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5753 {
5754   PetscErrorCode ierr;
5755   Mat            At,Bt,Ct;
5756 
5757   PetscFunctionBegin;
5758   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5759   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5760   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5761   ierr = MatDestroy(&At);CHKERRQ(ierr);
5762   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5763   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5764   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5765   PetscFunctionReturn(0);
5766 }
5767 
5768 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5769 {
5770   PetscErrorCode ierr;
5771   PetscBool      cisdense;
5772 
5773   PetscFunctionBegin;
5774   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5775   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5776   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5777   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5778   if (!cisdense) {
5779     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5780   }
5781   ierr = MatSetUp(C);CHKERRQ(ierr);
5782 
5783   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5784   PetscFunctionReturn(0);
5785 }
5786 
5787 /* ----------------------------------------------------------------*/
5788 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5789 {
5790   Mat_Product *product = C->product;
5791   Mat         A = product->A,B=product->B;
5792 
5793   PetscFunctionBegin;
5794   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5795     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5796 
5797   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5798   C->ops->productsymbolic = MatProductSymbolic_AB;
5799   PetscFunctionReturn(0);
5800 }
5801 
5802 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5803 {
5804   PetscErrorCode ierr;
5805   Mat_Product    *product = C->product;
5806 
5807   PetscFunctionBegin;
5808   if (product->type == MATPRODUCT_AB) {
5809     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5810   }
5811   PetscFunctionReturn(0);
5812 }
5813 /* ----------------------------------------------------------------*/
5814 
5815 /*MC
5816    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5817 
5818    Options Database Keys:
5819 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5820 
5821    Level: beginner
5822 
5823    Notes:
5824     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5825     in this case the values associated with the rows and columns one passes in are set to zero
5826     in the matrix
5827 
5828     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5829     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5830 
5831 .seealso: MatCreateAIJ()
5832 M*/
5833 
5834 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5835 {
5836   Mat_MPIAIJ     *b;
5837   PetscErrorCode ierr;
5838   PetscMPIInt    size;
5839 
5840   PetscFunctionBegin;
5841   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5842 
5843   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5844   B->data       = (void*)b;
5845   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5846   B->assembled  = PETSC_FALSE;
5847   B->insertmode = NOT_SET_VALUES;
5848   b->size       = size;
5849 
5850   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5851 
5852   /* build cache for off array entries formed */
5853   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5854 
5855   b->donotstash  = PETSC_FALSE;
5856   b->colmap      = NULL;
5857   b->garray      = NULL;
5858   b->roworiented = PETSC_TRUE;
5859 
5860   /* stuff used for matrix vector multiply */
5861   b->lvec  = NULL;
5862   b->Mvctx = NULL;
5863 
5864   /* stuff for MatGetRow() */
5865   b->rowindices   = NULL;
5866   b->rowvalues    = NULL;
5867   b->getrowactive = PETSC_FALSE;
5868 
5869   /* flexible pointer used in CUSP/CUSPARSE classes */
5870   b->spptr = NULL;
5871 
5872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5879   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5880   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5881   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5882 #if defined(PETSC_HAVE_MKL_SPARSE)
5883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5884 #endif
5885   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5888 #if defined(PETSC_HAVE_ELEMENTAL)
5889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5890 #endif
5891 #if defined(PETSC_HAVE_SCALAPACK)
5892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
5893 #endif
5894   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5896 #if defined(PETSC_HAVE_HYPRE)
5897   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5899 #endif
5900   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5901   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5902   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5903   PetscFunctionReturn(0);
5904 }
5905 
5906 /*@C
5907      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5908          and "off-diagonal" part of the matrix in CSR format.
5909 
5910    Collective
5911 
5912    Input Parameters:
5913 +  comm - MPI communicator
5914 .  m - number of local rows (Cannot be PETSC_DECIDE)
5915 .  n - This value should be the same as the local size used in creating the
5916        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5917        calculated if N is given) For square matrices n is almost always m.
5918 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5919 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5920 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5921 .   j - column indices
5922 .   a - matrix values
5923 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5924 .   oj - column indices
5925 -   oa - matrix values
5926 
5927    Output Parameter:
5928 .   mat - the matrix
5929 
5930    Level: advanced
5931 
5932    Notes:
5933        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5934        must free the arrays once the matrix has been destroyed and not before.
5935 
5936        The i and j indices are 0 based
5937 
5938        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5939 
5940        This sets local rows and cannot be used to set off-processor values.
5941 
5942        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5943        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5944        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5945        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5946        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5947        communication if it is known that only local entries will be set.
5948 
5949 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5950           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5951 @*/
5952 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5953 {
5954   PetscErrorCode ierr;
5955   Mat_MPIAIJ     *maij;
5956 
5957   PetscFunctionBegin;
5958   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5959   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5960   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5961   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5962   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5963   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5964   maij = (Mat_MPIAIJ*) (*mat)->data;
5965 
5966   (*mat)->preallocated = PETSC_TRUE;
5967 
5968   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5969   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5970 
5971   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5972   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5973 
5974   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5975   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5976   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5977   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5978 
5979   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5980   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5981   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5982   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5983   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5984   PetscFunctionReturn(0);
5985 }
5986 
5987 /*
5988     Special version for direct calls from Fortran
5989 */
5990 #include <petsc/private/fortranimpl.h>
5991 
5992 /* Change these macros so can be used in void function */
5993 #undef CHKERRQ
5994 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5995 #undef SETERRQ2
5996 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5997 #undef SETERRQ3
5998 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5999 #undef SETERRQ
6000 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6001 
6002 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6003 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6004 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6005 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6006 #else
6007 #endif
6008 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6009 {
6010   Mat            mat  = *mmat;
6011   PetscInt       m    = *mm, n = *mn;
6012   InsertMode     addv = *maddv;
6013   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6014   PetscScalar    value;
6015   PetscErrorCode ierr;
6016 
6017   MatCheckPreallocated(mat,1);
6018   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6019   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6020   {
6021     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6022     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6023     PetscBool roworiented = aij->roworiented;
6024 
6025     /* Some Variables required in the macro */
6026     Mat        A                    = aij->A;
6027     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6028     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6029     MatScalar  *aa                  = a->a;
6030     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6031     Mat        B                    = aij->B;
6032     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6033     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6034     MatScalar  *ba                  = b->a;
6035     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6036      * cannot use "#if defined" inside a macro. */
6037     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6038 
6039     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6040     PetscInt  nonew = a->nonew;
6041     MatScalar *ap1,*ap2;
6042 
6043     PetscFunctionBegin;
6044     for (i=0; i<m; i++) {
6045       if (im[i] < 0) continue;
6046       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6047       if (im[i] >= rstart && im[i] < rend) {
6048         row      = im[i] - rstart;
6049         lastcol1 = -1;
6050         rp1      = aj + ai[row];
6051         ap1      = aa + ai[row];
6052         rmax1    = aimax[row];
6053         nrow1    = ailen[row];
6054         low1     = 0;
6055         high1    = nrow1;
6056         lastcol2 = -1;
6057         rp2      = bj + bi[row];
6058         ap2      = ba + bi[row];
6059         rmax2    = bimax[row];
6060         nrow2    = bilen[row];
6061         low2     = 0;
6062         high2    = nrow2;
6063 
6064         for (j=0; j<n; j++) {
6065           if (roworiented) value = v[i*n+j];
6066           else value = v[i+j*m];
6067           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6068           if (in[j] >= cstart && in[j] < cend) {
6069             col = in[j] - cstart;
6070             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6071 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6072             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6073 #endif
6074           } else if (in[j] < 0) continue;
6075           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6076             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6077             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6078           } else {
6079             if (mat->was_assembled) {
6080               if (!aij->colmap) {
6081                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6082               }
6083 #if defined(PETSC_USE_CTABLE)
6084               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6085               col--;
6086 #else
6087               col = aij->colmap[in[j]] - 1;
6088 #endif
6089               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6090                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6091                 col  =  in[j];
6092                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6093                 B        = aij->B;
6094                 b        = (Mat_SeqAIJ*)B->data;
6095                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6096                 rp2      = bj + bi[row];
6097                 ap2      = ba + bi[row];
6098                 rmax2    = bimax[row];
6099                 nrow2    = bilen[row];
6100                 low2     = 0;
6101                 high2    = nrow2;
6102                 bm       = aij->B->rmap->n;
6103                 ba       = b->a;
6104                 inserted = PETSC_FALSE;
6105               }
6106             } else col = in[j];
6107             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6108 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6109             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6110 #endif
6111           }
6112         }
6113       } else if (!aij->donotstash) {
6114         if (roworiented) {
6115           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6116         } else {
6117           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6118         }
6119       }
6120     }
6121   }
6122   PetscFunctionReturnVoid();
6123 }
6124