xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f2ed2dc71a2ab9ffda85eae8afa0cbea9ed570de)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = 0;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818   case MAT_SORTED_FULL:
1819     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1820     break;
1821   case MAT_IGNORE_OFF_PROC_ENTRIES:
1822     a->donotstash = flg;
1823     break;
1824   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1825   case MAT_SPD:
1826   case MAT_SYMMETRIC:
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828   case MAT_HERMITIAN:
1829   case MAT_SYMMETRY_ETERNAL:
1830     break;
1831   case MAT_SUBMAT_SINGLEIS:
1832     A->submat_singleis = flg;
1833     break;
1834   case MAT_STRUCTURE_ONLY:
1835     /* The option is handled directly by MatSetOption() */
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1844 {
1845   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1846   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1847   PetscErrorCode ierr;
1848   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1849   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1850   PetscInt       *cmap,*idx_p;
1851 
1852   PetscFunctionBegin;
1853   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1854   mat->getrowactive = PETSC_TRUE;
1855 
1856   if (!mat->rowvalues && (idx || v)) {
1857     /*
1858         allocate enough space to hold information from the longest row.
1859     */
1860     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1861     PetscInt   max = 1,tmp;
1862     for (i=0; i<matin->rmap->n; i++) {
1863       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1864       if (max < tmp) max = tmp;
1865     }
1866     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1867   }
1868 
1869   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1870   lrow = row - rstart;
1871 
1872   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1873   if (!v)   {pvA = 0; pvB = 0;}
1874   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1875   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1876   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1877   nztot = nzA + nzB;
1878 
1879   cmap = mat->garray;
1880   if (v  || idx) {
1881     if (nztot) {
1882       /* Sort by increasing column numbers, assuming A and B already sorted */
1883       PetscInt imark = -1;
1884       if (v) {
1885         *v = v_p = mat->rowvalues;
1886         for (i=0; i<nzB; i++) {
1887           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1888           else break;
1889         }
1890         imark = i;
1891         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1892         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1893       }
1894       if (idx) {
1895         *idx = idx_p = mat->rowindices;
1896         if (imark > -1) {
1897           for (i=0; i<imark; i++) {
1898             idx_p[i] = cmap[cworkB[i]];
1899           }
1900         } else {
1901           for (i=0; i<nzB; i++) {
1902             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1903             else break;
1904           }
1905           imark = i;
1906         }
1907         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1908         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1909       }
1910     } else {
1911       if (idx) *idx = 0;
1912       if (v)   *v   = 0;
1913     }
1914   }
1915   *nz  = nztot;
1916   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1917   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1922 {
1923   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1924 
1925   PetscFunctionBegin;
1926   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1927   aij->getrowactive = PETSC_FALSE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1932 {
1933   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1934   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       i,j,cstart = mat->cmap->rstart;
1937   PetscReal      sum = 0.0;
1938   MatScalar      *v;
1939 
1940   PetscFunctionBegin;
1941   if (aij->size == 1) {
1942     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1943   } else {
1944     if (type == NORM_FROBENIUS) {
1945       v = amat->a;
1946       for (i=0; i<amat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1960       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v     = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v   = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1999 {
2000   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2001   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2002   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2003   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2004   PetscErrorCode  ierr;
2005   Mat             B,A_diag,*B_diag;
2006   const MatScalar *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2237        will be updated */
2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2239     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2240       Y->offloadmask = PETSC_OFFLOAD_CPU;
2241     }
2242 #endif
2243   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2244     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2245   } else {
2246     Mat      B;
2247     PetscInt *nnz_d,*nnz_o;
2248     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2249     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2250     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2251     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2252     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2253     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2254     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2256     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2257     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2258     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2259     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2261     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2262   }
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2267 
2268 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2269 {
2270 #if defined(PETSC_USE_COMPLEX)
2271   PetscErrorCode ierr;
2272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2273 
2274   PetscFunctionBegin;
2275   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2276   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2277 #else
2278   PetscFunctionBegin;
2279 #endif
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2284 {
2285   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2290   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2295 {
2296   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2301   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2306 {
2307   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2308   PetscErrorCode ierr;
2309   PetscInt       i,*idxb = 0;
2310   PetscScalar    *va,*vb;
2311   Vec            vtmp;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2315   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2316   if (idx) {
2317     for (i=0; i<A->rmap->n; i++) {
2318       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2319     }
2320   }
2321 
2322   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2323   if (idx) {
2324     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2325   }
2326   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2327   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2328 
2329   for (i=0; i<A->rmap->n; i++) {
2330     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2331       va[i] = vb[i];
2332       if (idx) idx[i] = a->garray[idxb[i]];
2333     }
2334   }
2335 
2336   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2337   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2338   ierr = PetscFree(idxb);CHKERRQ(ierr);
2339   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2344 {
2345   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2346   PetscErrorCode ierr;
2347   PetscInt       i,*idxb = 0;
2348   PetscScalar    *va,*vb;
2349   Vec            vtmp;
2350 
2351   PetscFunctionBegin;
2352   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2353   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2354   if (idx) {
2355     for (i=0; i<A->cmap->n; i++) {
2356       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2357     }
2358   }
2359 
2360   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2361   if (idx) {
2362     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2363   }
2364   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2365   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2366 
2367   for (i=0; i<A->rmap->n; i++) {
2368     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2369       va[i] = vb[i];
2370       if (idx) idx[i] = a->garray[idxb[i]];
2371     }
2372   }
2373 
2374   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2375   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2376   ierr = PetscFree(idxb);CHKERRQ(ierr);
2377   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2382 {
2383   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2384   PetscInt       n      = A->rmap->n;
2385   PetscInt       cstart = A->cmap->rstart;
2386   PetscInt       *cmap  = mat->garray;
2387   PetscInt       *diagIdx, *offdiagIdx;
2388   Vec            diagV, offdiagV;
2389   PetscScalar    *a, *diagA, *offdiagA;
2390   PetscInt       r;
2391   PetscErrorCode ierr;
2392 
2393   PetscFunctionBegin;
2394   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2395   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2396   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2397   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2398   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2399   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2400   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2401   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2402   for (r = 0; r < n; ++r) {
2403     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2404       a[r]   = diagA[r];
2405       idx[r] = cstart + diagIdx[r];
2406     } else {
2407       a[r]   = offdiagA[r];
2408       idx[r] = cmap[offdiagIdx[r]];
2409     }
2410   }
2411   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2412   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2413   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2414   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2415   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2416   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2421 {
2422   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2423   PetscInt       n      = A->rmap->n;
2424   PetscInt       cstart = A->cmap->rstart;
2425   PetscInt       *cmap  = mat->garray;
2426   PetscInt       *diagIdx, *offdiagIdx;
2427   Vec            diagV, offdiagV;
2428   PetscScalar    *a, *diagA, *offdiagA;
2429   PetscInt       r;
2430   PetscErrorCode ierr;
2431 
2432   PetscFunctionBegin;
2433   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2434   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2435   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2436   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2437   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2438   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2439   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2440   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2441   for (r = 0; r < n; ++r) {
2442     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2443       a[r]   = diagA[r];
2444       idx[r] = cstart + diagIdx[r];
2445     } else {
2446       a[r]   = offdiagA[r];
2447       idx[r] = cmap[offdiagIdx[r]];
2448     }
2449   }
2450   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2451   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2452   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2453   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2454   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2455   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2460 {
2461   PetscErrorCode ierr;
2462   Mat            *dummy;
2463 
2464   PetscFunctionBegin;
2465   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2466   *newmat = *dummy;
2467   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2472 {
2473   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2474   PetscErrorCode ierr;
2475 
2476   PetscFunctionBegin;
2477   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2478   A->factorerrortype = a->A->factorerrortype;
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2483 {
2484   PetscErrorCode ierr;
2485   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2486 
2487   PetscFunctionBegin;
2488   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2489   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2490   if (x->assembled) {
2491     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2492   } else {
2493     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2494   }
2495   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2496   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2501 {
2502   PetscFunctionBegin;
2503   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2504   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 /*@
2509    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2510 
2511    Collective on Mat
2512 
2513    Input Parameters:
2514 +    A - the matrix
2515 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2516 
2517  Level: advanced
2518 
2519 @*/
2520 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2521 {
2522   PetscErrorCode       ierr;
2523 
2524   PetscFunctionBegin;
2525   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2530 {
2531   PetscErrorCode       ierr;
2532   PetscBool            sc = PETSC_FALSE,flg;
2533 
2534   PetscFunctionBegin;
2535   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2536   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2537   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2538   if (flg) {
2539     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2540   }
2541   ierr = PetscOptionsTail();CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2546 {
2547   PetscErrorCode ierr;
2548   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2549   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2550 
2551   PetscFunctionBegin;
2552   if (!Y->preallocated) {
2553     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2554   } else if (!aij->nz) {
2555     PetscInt nonew = aij->nonew;
2556     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2557     aij->nonew = nonew;
2558   }
2559   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2564 {
2565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2570   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2571   if (d) {
2572     PetscInt rstart;
2573     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2574     *d += rstart;
2575 
2576   }
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2581 {
2582   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2583   PetscErrorCode ierr;
2584 
2585   PetscFunctionBegin;
2586   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 /* -------------------------------------------------------------------*/
2591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2592                                        MatGetRow_MPIAIJ,
2593                                        MatRestoreRow_MPIAIJ,
2594                                        MatMult_MPIAIJ,
2595                                 /* 4*/ MatMultAdd_MPIAIJ,
2596                                        MatMultTranspose_MPIAIJ,
2597                                        MatMultTransposeAdd_MPIAIJ,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*10*/ 0,
2602                                        0,
2603                                        0,
2604                                        MatSOR_MPIAIJ,
2605                                        MatTranspose_MPIAIJ,
2606                                 /*15*/ MatGetInfo_MPIAIJ,
2607                                        MatEqual_MPIAIJ,
2608                                        MatGetDiagonal_MPIAIJ,
2609                                        MatDiagonalScale_MPIAIJ,
2610                                        MatNorm_MPIAIJ,
2611                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2612                                        MatAssemblyEnd_MPIAIJ,
2613                                        MatSetOption_MPIAIJ,
2614                                        MatZeroEntries_MPIAIJ,
2615                                 /*24*/ MatZeroRows_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*29*/ MatSetUp_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        MatGetDiagonalBlock_MPIAIJ,
2624                                        0,
2625                                 /*34*/ MatDuplicate_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                 /*39*/ MatAXPY_MPIAIJ,
2631                                        MatCreateSubMatrices_MPIAIJ,
2632                                        MatIncreaseOverlap_MPIAIJ,
2633                                        MatGetValues_MPIAIJ,
2634                                        MatCopy_MPIAIJ,
2635                                 /*44*/ MatGetRowMax_MPIAIJ,
2636                                        MatScale_MPIAIJ,
2637                                        MatShift_MPIAIJ,
2638                                        MatDiagonalSet_MPIAIJ,
2639                                        MatZeroRowsColumns_MPIAIJ,
2640                                 /*49*/ MatSetRandom_MPIAIJ,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2646                                        0,
2647                                        MatSetUnfactored_MPIAIJ,
2648                                        MatPermute_MPIAIJ,
2649                                        0,
2650                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2651                                        MatDestroy_MPIAIJ,
2652                                        MatView_MPIAIJ,
2653                                        0,
2654                                        0,
2655                                 /*64*/ 0,
2656                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2661                                        MatGetRowMinAbs_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                 /*75*/ MatFDColoringApply_AIJ,
2667                                        MatSetFromOptions_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                        MatFindZeroDiagonals_MPIAIJ,
2671                                 /*80*/ 0,
2672                                        0,
2673                                        0,
2674                                 /*83*/ MatLoad_MPIAIJ,
2675                                        MatIsSymmetric_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*89*/ 0,
2681                                        0,
2682                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        MatBindToCPU_MPIAIJ,
2690                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2691                                        0,
2692                                        0,
2693                                        MatConjugate_MPIAIJ,
2694                                        0,
2695                                 /*104*/MatSetValuesRow_MPIAIJ,
2696                                        MatRealPart_MPIAIJ,
2697                                        MatImaginaryPart_MPIAIJ,
2698                                        0,
2699                                        0,
2700                                 /*109*/0,
2701                                        0,
2702                                        MatGetRowMin_MPIAIJ,
2703                                        0,
2704                                        MatMissingDiagonal_MPIAIJ,
2705                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2706                                        0,
2707                                        MatGetGhosts_MPIAIJ,
2708                                        0,
2709                                        0,
2710                                 /*119*/0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        MatGetMultiProcBlock_MPIAIJ,
2715                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2716                                        MatGetColumnNorms_MPIAIJ,
2717                                        MatInvertBlockDiagonal_MPIAIJ,
2718                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2719                                        MatCreateSubMatricesMPI_MPIAIJ,
2720                                 /*129*/0,
2721                                        0,
2722                                        0,
2723                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2724                                        0,
2725                                 /*134*/0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                 /*139*/MatSetBlockSizes_MPIAIJ,
2731                                        0,
2732                                        0,
2733                                        MatFDColoringSetUp_MPIXAIJ,
2734                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2735                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2736                                 /*145*/0,
2737                                        0,
2738                                        0
2739 };
2740 
2741 /* ----------------------------------------------------------------------------------------*/
2742 
2743 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2744 {
2745   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2746   PetscErrorCode ierr;
2747 
2748   PetscFunctionBegin;
2749   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2750   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2755 {
2756   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2757   PetscErrorCode ierr;
2758 
2759   PetscFunctionBegin;
2760   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2761   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2766 {
2767   Mat_MPIAIJ     *b;
2768   PetscErrorCode ierr;
2769   PetscMPIInt    size;
2770 
2771   PetscFunctionBegin;
2772   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2773   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2774   b = (Mat_MPIAIJ*)B->data;
2775 
2776 #if defined(PETSC_USE_CTABLE)
2777   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2778 #else
2779   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2780 #endif
2781   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2782   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2783   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2784 
2785   /* Because the B will have been resized we simply destroy it and create a new one each time */
2786   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2787   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2788   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2789   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2790   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2791   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2792   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2793 
2794   if (!B->preallocated) {
2795     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2796     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2797     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2798     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2799     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2800   }
2801 
2802   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2803   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2804   B->preallocated  = PETSC_TRUE;
2805   B->was_assembled = PETSC_FALSE;
2806   B->assembled     = PETSC_FALSE;
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2811 {
2812   Mat_MPIAIJ     *b;
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2817   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2818   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2819   b = (Mat_MPIAIJ*)B->data;
2820 
2821 #if defined(PETSC_USE_CTABLE)
2822   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2823 #else
2824   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2825 #endif
2826   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2827   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2828   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2829 
2830   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2831   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2832   B->preallocated  = PETSC_TRUE;
2833   B->was_assembled = PETSC_FALSE;
2834   B->assembled = PETSC_FALSE;
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2839 {
2840   Mat            mat;
2841   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2842   PetscErrorCode ierr;
2843 
2844   PetscFunctionBegin;
2845   *newmat = 0;
2846   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2847   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2848   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2849   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2850   a       = (Mat_MPIAIJ*)mat->data;
2851 
2852   mat->factortype   = matin->factortype;
2853   mat->assembled    = matin->assembled;
2854   mat->insertmode   = NOT_SET_VALUES;
2855   mat->preallocated = matin->preallocated;
2856 
2857   a->size         = oldmat->size;
2858   a->rank         = oldmat->rank;
2859   a->donotstash   = oldmat->donotstash;
2860   a->roworiented  = oldmat->roworiented;
2861   a->rowindices   = NULL;
2862   a->rowvalues    = NULL;
2863   a->getrowactive = PETSC_FALSE;
2864 
2865   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2866   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2867 
2868   if (oldmat->colmap) {
2869 #if defined(PETSC_USE_CTABLE)
2870     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2871 #else
2872     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2873     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2874     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2875 #endif
2876   } else a->colmap = NULL;
2877   if (oldmat->garray) {
2878     PetscInt len;
2879     len  = oldmat->B->cmap->n;
2880     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2881     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2882     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2883   } else a->garray = NULL;
2884 
2885   /* It may happen MatDuplicate is called with a non-assembled matrix
2886      In fact, MatDuplicate only requires the matrix to be preallocated
2887      This may happen inside a DMCreateMatrix_Shell */
2888   if (oldmat->lvec) {
2889     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2890     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2891   }
2892   if (oldmat->Mvctx) {
2893     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2894     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2895   }
2896   if (oldmat->Mvctx_mpi1) {
2897     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2898     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2899   }
2900 
2901   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2902   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2903   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2904   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2905   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2906   *newmat = mat;
2907   PetscFunctionReturn(0);
2908 }
2909 
2910 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2911 {
2912   PetscBool      isbinary, ishdf5;
2913   PetscErrorCode ierr;
2914 
2915   PetscFunctionBegin;
2916   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2917   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2918   /* force binary viewer to load .info file if it has not yet done so */
2919   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2920   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2921   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2922   if (isbinary) {
2923     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2924   } else if (ishdf5) {
2925 #if defined(PETSC_HAVE_HDF5)
2926     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2927 #else
2928     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2929 #endif
2930   } else {
2931     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2932   }
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2937 {
2938   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2939   PetscInt       *rowidxs,*colidxs;
2940   PetscScalar    *matvals;
2941   PetscErrorCode ierr;
2942 
2943   PetscFunctionBegin;
2944   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2945 
2946   /* read in matrix header */
2947   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2948   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2949   M  = header[1]; N = header[2]; nz = header[3];
2950   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2951   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2952   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2953 
2954   /* set block sizes from the viewer's .info file */
2955   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2956   /* set global sizes if not set already */
2957   if (mat->rmap->N < 0) mat->rmap->N = M;
2958   if (mat->cmap->N < 0) mat->cmap->N = N;
2959   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2960   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2961 
2962   /* check if the matrix sizes are correct */
2963   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2964   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2965 
2966   /* read in row lengths and build row indices */
2967   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2968   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2969   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2970   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2971   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2972   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2973   /* read in column indices and matrix values */
2974   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2975   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2976   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2977   /* store matrix indices and values */
2978   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2979   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2980   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 /* Not scalable because of ISAllGather() unless getting all columns. */
2985 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2986 {
2987   PetscErrorCode ierr;
2988   IS             iscol_local;
2989   PetscBool      isstride;
2990   PetscMPIInt    lisstride=0,gisstride;
2991 
2992   PetscFunctionBegin;
2993   /* check if we are grabbing all columns*/
2994   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2995 
2996   if (isstride) {
2997     PetscInt  start,len,mstart,mlen;
2998     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2999     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3000     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3001     if (mstart == start && mlen-mstart == len) lisstride = 1;
3002   }
3003 
3004   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3005   if (gisstride) {
3006     PetscInt N;
3007     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3008     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3009     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3010     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3011   } else {
3012     PetscInt cbs;
3013     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3014     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3015     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3016   }
3017 
3018   *isseq = iscol_local;
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 /*
3023  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3024  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3025 
3026  Input Parameters:
3027    mat - matrix
3028    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3029            i.e., mat->rstart <= isrow[i] < mat->rend
3030    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3031            i.e., mat->cstart <= iscol[i] < mat->cend
3032  Output Parameter:
3033    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3034    iscol_o - sequential column index set for retrieving mat->B
3035    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3036  */
3037 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3038 {
3039   PetscErrorCode ierr;
3040   Vec            x,cmap;
3041   const PetscInt *is_idx;
3042   PetscScalar    *xarray,*cmaparray;
3043   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3044   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3045   Mat            B=a->B;
3046   Vec            lvec=a->lvec,lcmap;
3047   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3048   MPI_Comm       comm;
3049   VecScatter     Mvctx=a->Mvctx;
3050 
3051   PetscFunctionBegin;
3052   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3053   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3054 
3055   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3056   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3057   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3058   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3059   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3060 
3061   /* Get start indices */
3062   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3063   isstart -= ncols;
3064   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3065 
3066   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3067   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3068   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3069   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3070   for (i=0; i<ncols; i++) {
3071     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3072     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3073     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3074   }
3075   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3076   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3077   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3078 
3079   /* Get iscol_d */
3080   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3081   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3082   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3083 
3084   /* Get isrow_d */
3085   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3086   rstart = mat->rmap->rstart;
3087   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3088   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3089   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3090   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3091 
3092   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3093   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3094   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3095 
3096   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3097   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3098   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3099 
3100   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3101 
3102   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3103   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3104 
3105   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3106   /* off-process column indices */
3107   count = 0;
3108   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3109   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3110 
3111   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3112   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3113   for (i=0; i<Bn; i++) {
3114     if (PetscRealPart(xarray[i]) > -1.0) {
3115       idx[count]     = i;                   /* local column index in off-diagonal part B */
3116       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3117       count++;
3118     }
3119   }
3120   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3121   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3122 
3123   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3124   /* cannot ensure iscol_o has same blocksize as iscol! */
3125 
3126   ierr = PetscFree(idx);CHKERRQ(ierr);
3127   *garray = cmap1;
3128 
3129   ierr = VecDestroy(&x);CHKERRQ(ierr);
3130   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3131   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3132   PetscFunctionReturn(0);
3133 }
3134 
3135 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3136 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3137 {
3138   PetscErrorCode ierr;
3139   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3140   Mat            M = NULL;
3141   MPI_Comm       comm;
3142   IS             iscol_d,isrow_d,iscol_o;
3143   Mat            Asub = NULL,Bsub = NULL;
3144   PetscInt       n;
3145 
3146   PetscFunctionBegin;
3147   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3148 
3149   if (call == MAT_REUSE_MATRIX) {
3150     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3151     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3152     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3153 
3154     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3155     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3156 
3157     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3158     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3159 
3160     /* Update diagonal and off-diagonal portions of submat */
3161     asub = (Mat_MPIAIJ*)(*submat)->data;
3162     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3163     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3164     if (n) {
3165       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3166     }
3167     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3168     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3169 
3170   } else { /* call == MAT_INITIAL_MATRIX) */
3171     const PetscInt *garray;
3172     PetscInt        BsubN;
3173 
3174     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3175     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3176 
3177     /* Create local submatrices Asub and Bsub */
3178     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3179     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3180 
3181     /* Create submatrix M */
3182     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3183 
3184     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3185     asub = (Mat_MPIAIJ*)M->data;
3186 
3187     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3188     n = asub->B->cmap->N;
3189     if (BsubN > n) {
3190       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3191       const PetscInt *idx;
3192       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3193       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3194 
3195       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3196       j = 0;
3197       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3198       for (i=0; i<n; i++) {
3199         if (j >= BsubN) break;
3200         while (subgarray[i] > garray[j]) j++;
3201 
3202         if (subgarray[i] == garray[j]) {
3203           idx_new[i] = idx[j++];
3204         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3205       }
3206       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3207 
3208       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3209       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3210 
3211     } else if (BsubN < n) {
3212       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3213     }
3214 
3215     ierr = PetscFree(garray);CHKERRQ(ierr);
3216     *submat = M;
3217 
3218     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3219     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3220     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3221 
3222     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3223     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3224 
3225     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3226     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3227   }
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3232 {
3233   PetscErrorCode ierr;
3234   IS             iscol_local=NULL,isrow_d;
3235   PetscInt       csize;
3236   PetscInt       n,i,j,start,end;
3237   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3238   MPI_Comm       comm;
3239 
3240   PetscFunctionBegin;
3241   /* If isrow has same processor distribution as mat,
3242      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3243   if (call == MAT_REUSE_MATRIX) {
3244     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3245     if (isrow_d) {
3246       sameRowDist  = PETSC_TRUE;
3247       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3248     } else {
3249       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3250       if (iscol_local) {
3251         sameRowDist  = PETSC_TRUE;
3252         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3253       }
3254     }
3255   } else {
3256     /* Check if isrow has same processor distribution as mat */
3257     sameDist[0] = PETSC_FALSE;
3258     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3259     if (!n) {
3260       sameDist[0] = PETSC_TRUE;
3261     } else {
3262       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3263       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3264       if (i >= start && j < end) {
3265         sameDist[0] = PETSC_TRUE;
3266       }
3267     }
3268 
3269     /* Check if iscol has same processor distribution as mat */
3270     sameDist[1] = PETSC_FALSE;
3271     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3272     if (!n) {
3273       sameDist[1] = PETSC_TRUE;
3274     } else {
3275       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3276       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3277       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3278     }
3279 
3280     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3281     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3282     sameRowDist = tsameDist[0];
3283   }
3284 
3285   if (sameRowDist) {
3286     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3287       /* isrow and iscol have same processor distribution as mat */
3288       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3289       PetscFunctionReturn(0);
3290     } else { /* sameRowDist */
3291       /* isrow has same processor distribution as mat */
3292       if (call == MAT_INITIAL_MATRIX) {
3293         PetscBool sorted;
3294         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3295         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3296         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3297         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3298 
3299         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3300         if (sorted) {
3301           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3302           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3303           PetscFunctionReturn(0);
3304         }
3305       } else { /* call == MAT_REUSE_MATRIX */
3306         IS    iscol_sub;
3307         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3308         if (iscol_sub) {
3309           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3310           PetscFunctionReturn(0);
3311         }
3312       }
3313     }
3314   }
3315 
3316   /* General case: iscol -> iscol_local which has global size of iscol */
3317   if (call == MAT_REUSE_MATRIX) {
3318     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3319     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3320   } else {
3321     if (!iscol_local) {
3322       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3323     }
3324   }
3325 
3326   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3327   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3328 
3329   if (call == MAT_INITIAL_MATRIX) {
3330     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3331     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3332   }
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 /*@C
3337      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3338          and "off-diagonal" part of the matrix in CSR format.
3339 
3340    Collective
3341 
3342    Input Parameters:
3343 +  comm - MPI communicator
3344 .  A - "diagonal" portion of matrix
3345 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3346 -  garray - global index of B columns
3347 
3348    Output Parameter:
3349 .   mat - the matrix, with input A as its local diagonal matrix
3350    Level: advanced
3351 
3352    Notes:
3353        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3354        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3355 
3356 .seealso: MatCreateMPIAIJWithSplitArrays()
3357 @*/
3358 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3359 {
3360   PetscErrorCode ierr;
3361   Mat_MPIAIJ     *maij;
3362   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3363   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3364   PetscScalar    *oa=b->a;
3365   Mat            Bnew;
3366   PetscInt       m,n,N;
3367 
3368   PetscFunctionBegin;
3369   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3370   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3371   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3372   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3373   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3374   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3375 
3376   /* Get global columns of mat */
3377   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3378 
3379   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3380   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3381   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3382   maij = (Mat_MPIAIJ*)(*mat)->data;
3383 
3384   (*mat)->preallocated = PETSC_TRUE;
3385 
3386   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3387   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3388 
3389   /* Set A as diagonal portion of *mat */
3390   maij->A = A;
3391 
3392   nz = oi[m];
3393   for (i=0; i<nz; i++) {
3394     col   = oj[i];
3395     oj[i] = garray[col];
3396   }
3397 
3398    /* Set Bnew as off-diagonal portion of *mat */
3399   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3400   bnew        = (Mat_SeqAIJ*)Bnew->data;
3401   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3402   maij->B     = Bnew;
3403 
3404   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3405 
3406   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3407   b->free_a       = PETSC_FALSE;
3408   b->free_ij      = PETSC_FALSE;
3409   ierr = MatDestroy(&B);CHKERRQ(ierr);
3410 
3411   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3412   bnew->free_a       = PETSC_TRUE;
3413   bnew->free_ij      = PETSC_TRUE;
3414 
3415   /* condense columns of maij->B */
3416   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3417   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3419   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3420   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3425 
3426 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3427 {
3428   PetscErrorCode ierr;
3429   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3430   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3431   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3432   Mat            M,Msub,B=a->B;
3433   MatScalar      *aa;
3434   Mat_SeqAIJ     *aij;
3435   PetscInt       *garray = a->garray,*colsub,Ncols;
3436   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3437   IS             iscol_sub,iscmap;
3438   const PetscInt *is_idx,*cmap;
3439   PetscBool      allcolumns=PETSC_FALSE;
3440   MPI_Comm       comm;
3441 
3442   PetscFunctionBegin;
3443   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3444 
3445   if (call == MAT_REUSE_MATRIX) {
3446     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3447     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3448     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3449 
3450     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3451     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3452 
3453     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3454     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3455 
3456     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3457 
3458   } else { /* call == MAT_INITIAL_MATRIX) */
3459     PetscBool flg;
3460 
3461     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3462     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3463 
3464     /* (1) iscol -> nonscalable iscol_local */
3465     /* Check for special case: each processor gets entire matrix columns */
3466     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3467     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3468     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3469     if (allcolumns) {
3470       iscol_sub = iscol_local;
3471       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3472       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3473 
3474     } else {
3475       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3476       PetscInt *idx,*cmap1,k;
3477       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3478       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3479       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3480       count = 0;
3481       k     = 0;
3482       for (i=0; i<Ncols; i++) {
3483         j = is_idx[i];
3484         if (j >= cstart && j < cend) {
3485           /* diagonal part of mat */
3486           idx[count]     = j;
3487           cmap1[count++] = i; /* column index in submat */
3488         } else if (Bn) {
3489           /* off-diagonal part of mat */
3490           if (j == garray[k]) {
3491             idx[count]     = j;
3492             cmap1[count++] = i;  /* column index in submat */
3493           } else if (j > garray[k]) {
3494             while (j > garray[k] && k < Bn-1) k++;
3495             if (j == garray[k]) {
3496               idx[count]     = j;
3497               cmap1[count++] = i; /* column index in submat */
3498             }
3499           }
3500         }
3501       }
3502       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3503 
3504       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3505       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3506       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3507 
3508       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3509     }
3510 
3511     /* (3) Create sequential Msub */
3512     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3513   }
3514 
3515   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3516   aij  = (Mat_SeqAIJ*)(Msub)->data;
3517   ii   = aij->i;
3518   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3519 
3520   /*
3521       m - number of local rows
3522       Ncols - number of columns (same on all processors)
3523       rstart - first row in new global matrix generated
3524   */
3525   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3526 
3527   if (call == MAT_INITIAL_MATRIX) {
3528     /* (4) Create parallel newmat */
3529     PetscMPIInt    rank,size;
3530     PetscInt       csize;
3531 
3532     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3533     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3534 
3535     /*
3536         Determine the number of non-zeros in the diagonal and off-diagonal
3537         portions of the matrix in order to do correct preallocation
3538     */
3539 
3540     /* first get start and end of "diagonal" columns */
3541     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3542     if (csize == PETSC_DECIDE) {
3543       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3544       if (mglobal == Ncols) { /* square matrix */
3545         nlocal = m;
3546       } else {
3547         nlocal = Ncols/size + ((Ncols % size) > rank);
3548       }
3549     } else {
3550       nlocal = csize;
3551     }
3552     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3553     rstart = rend - nlocal;
3554     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3555 
3556     /* next, compute all the lengths */
3557     jj    = aij->j;
3558     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3559     olens = dlens + m;
3560     for (i=0; i<m; i++) {
3561       jend = ii[i+1] - ii[i];
3562       olen = 0;
3563       dlen = 0;
3564       for (j=0; j<jend; j++) {
3565         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3566         else dlen++;
3567         jj++;
3568       }
3569       olens[i] = olen;
3570       dlens[i] = dlen;
3571     }
3572 
3573     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3574     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3575 
3576     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3577     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3578     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3579     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3580     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3581     ierr = PetscFree(dlens);CHKERRQ(ierr);
3582 
3583   } else { /* call == MAT_REUSE_MATRIX */
3584     M    = *newmat;
3585     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3586     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3587     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3588     /*
3589          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3590        rather than the slower MatSetValues().
3591     */
3592     M->was_assembled = PETSC_TRUE;
3593     M->assembled     = PETSC_FALSE;
3594   }
3595 
3596   /* (5) Set values of Msub to *newmat */
3597   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3598   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3599 
3600   jj   = aij->j;
3601   aa   = aij->a;
3602   for (i=0; i<m; i++) {
3603     row = rstart + i;
3604     nz  = ii[i+1] - ii[i];
3605     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3606     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3607     jj += nz; aa += nz;
3608   }
3609   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3610 
3611   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3612   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3613 
3614   ierr = PetscFree(colsub);CHKERRQ(ierr);
3615 
3616   /* save Msub, iscol_sub and iscmap used in processor for next request */
3617   if (call ==  MAT_INITIAL_MATRIX) {
3618     *newmat = M;
3619     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3620     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3621 
3622     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3623     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3624 
3625     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3626     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3627 
3628     if (iscol_local) {
3629       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3630       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3631     }
3632   }
3633   PetscFunctionReturn(0);
3634 }
3635 
3636 /*
3637     Not great since it makes two copies of the submatrix, first an SeqAIJ
3638   in local and then by concatenating the local matrices the end result.
3639   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3640 
3641   Note: This requires a sequential iscol with all indices.
3642 */
3643 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3644 {
3645   PetscErrorCode ierr;
3646   PetscMPIInt    rank,size;
3647   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3648   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3649   Mat            M,Mreuse;
3650   MatScalar      *aa,*vwork;
3651   MPI_Comm       comm;
3652   Mat_SeqAIJ     *aij;
3653   PetscBool      colflag,allcolumns=PETSC_FALSE;
3654 
3655   PetscFunctionBegin;
3656   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3657   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3659 
3660   /* Check for special case: each processor gets entire matrix columns */
3661   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3662   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3663   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3664   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3665 
3666   if (call ==  MAT_REUSE_MATRIX) {
3667     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3668     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3669     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3670   } else {
3671     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3672   }
3673 
3674   /*
3675       m - number of local rows
3676       n - number of columns (same on all processors)
3677       rstart - first row in new global matrix generated
3678   */
3679   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3680   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3681   if (call == MAT_INITIAL_MATRIX) {
3682     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3683     ii  = aij->i;
3684     jj  = aij->j;
3685 
3686     /*
3687         Determine the number of non-zeros in the diagonal and off-diagonal
3688         portions of the matrix in order to do correct preallocation
3689     */
3690 
3691     /* first get start and end of "diagonal" columns */
3692     if (csize == PETSC_DECIDE) {
3693       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3694       if (mglobal == n) { /* square matrix */
3695         nlocal = m;
3696       } else {
3697         nlocal = n/size + ((n % size) > rank);
3698       }
3699     } else {
3700       nlocal = csize;
3701     }
3702     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3703     rstart = rend - nlocal;
3704     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3705 
3706     /* next, compute all the lengths */
3707     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3708     olens = dlens + m;
3709     for (i=0; i<m; i++) {
3710       jend = ii[i+1] - ii[i];
3711       olen = 0;
3712       dlen = 0;
3713       for (j=0; j<jend; j++) {
3714         if (*jj < rstart || *jj >= rend) olen++;
3715         else dlen++;
3716         jj++;
3717       }
3718       olens[i] = olen;
3719       dlens[i] = dlen;
3720     }
3721     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3722     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3723     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3724     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3725     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3726     ierr = PetscFree(dlens);CHKERRQ(ierr);
3727   } else {
3728     PetscInt ml,nl;
3729 
3730     M    = *newmat;
3731     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3732     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3733     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3734     /*
3735          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3736        rather than the slower MatSetValues().
3737     */
3738     M->was_assembled = PETSC_TRUE;
3739     M->assembled     = PETSC_FALSE;
3740   }
3741   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3742   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3743   ii   = aij->i;
3744   jj   = aij->j;
3745   aa   = aij->a;
3746   for (i=0; i<m; i++) {
3747     row   = rstart + i;
3748     nz    = ii[i+1] - ii[i];
3749     cwork = jj;     jj += nz;
3750     vwork = aa;     aa += nz;
3751     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3752   }
3753 
3754   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756   *newmat = M;
3757 
3758   /* save submatrix used in processor for next request */
3759   if (call ==  MAT_INITIAL_MATRIX) {
3760     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3761     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3762   }
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3767 {
3768   PetscInt       m,cstart, cend,j,nnz,i,d;
3769   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3770   const PetscInt *JJ;
3771   PetscErrorCode ierr;
3772   PetscBool      nooffprocentries;
3773 
3774   PetscFunctionBegin;
3775   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3776 
3777   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3778   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3779   m      = B->rmap->n;
3780   cstart = B->cmap->rstart;
3781   cend   = B->cmap->rend;
3782   rstart = B->rmap->rstart;
3783 
3784   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3785 
3786   if (PetscDefined(USE_DEBUG)) {
3787     for (i=0; i<m; i++) {
3788       nnz = Ii[i+1]- Ii[i];
3789       JJ  = J + Ii[i];
3790       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3791       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3792       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3793     }
3794   }
3795 
3796   for (i=0; i<m; i++) {
3797     nnz     = Ii[i+1]- Ii[i];
3798     JJ      = J + Ii[i];
3799     nnz_max = PetscMax(nnz_max,nnz);
3800     d       = 0;
3801     for (j=0; j<nnz; j++) {
3802       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3803     }
3804     d_nnz[i] = d;
3805     o_nnz[i] = nnz - d;
3806   }
3807   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3808   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3809 
3810   for (i=0; i<m; i++) {
3811     ii   = i + rstart;
3812     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3813   }
3814   nooffprocentries    = B->nooffprocentries;
3815   B->nooffprocentries = PETSC_TRUE;
3816   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3817   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3818   B->nooffprocentries = nooffprocentries;
3819 
3820   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 /*@
3825    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3826    (the default parallel PETSc format).
3827 
3828    Collective
3829 
3830    Input Parameters:
3831 +  B - the matrix
3832 .  i - the indices into j for the start of each local row (starts with zero)
3833 .  j - the column indices for each local row (starts with zero)
3834 -  v - optional values in the matrix
3835 
3836    Level: developer
3837 
3838    Notes:
3839        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3840      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3841      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3842 
3843        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3844 
3845        The format which is used for the sparse matrix input, is equivalent to a
3846     row-major ordering.. i.e for the following matrix, the input data expected is
3847     as shown
3848 
3849 $        1 0 0
3850 $        2 0 3     P0
3851 $       -------
3852 $        4 5 6     P1
3853 $
3854 $     Process0 [P0]: rows_owned=[0,1]
3855 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3856 $        j =  {0,0,2}  [size = 3]
3857 $        v =  {1,2,3}  [size = 3]
3858 $
3859 $     Process1 [P1]: rows_owned=[2]
3860 $        i =  {0,3}    [size = nrow+1  = 1+1]
3861 $        j =  {0,1,2}  [size = 3]
3862 $        v =  {4,5,6}  [size = 3]
3863 
3864 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3865           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3866 @*/
3867 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3868 {
3869   PetscErrorCode ierr;
3870 
3871   PetscFunctionBegin;
3872   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 /*@C
3877    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3878    (the default parallel PETSc format).  For good matrix assembly performance
3879    the user should preallocate the matrix storage by setting the parameters
3880    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3881    performance can be increased by more than a factor of 50.
3882 
3883    Collective
3884 
3885    Input Parameters:
3886 +  B - the matrix
3887 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3888            (same value is used for all local rows)
3889 .  d_nnz - array containing the number of nonzeros in the various rows of the
3890            DIAGONAL portion of the local submatrix (possibly different for each row)
3891            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3892            The size of this array is equal to the number of local rows, i.e 'm'.
3893            For matrices that will be factored, you must leave room for (and set)
3894            the diagonal entry even if it is zero.
3895 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3896            submatrix (same value is used for all local rows).
3897 -  o_nnz - array containing the number of nonzeros in the various rows of the
3898            OFF-DIAGONAL portion of the local submatrix (possibly different for
3899            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3900            structure. The size of this array is equal to the number
3901            of local rows, i.e 'm'.
3902 
3903    If the *_nnz parameter is given then the *_nz parameter is ignored
3904 
3905    The AIJ format (also called the Yale sparse matrix format or
3906    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3907    storage.  The stored row and column indices begin with zero.
3908    See Users-Manual: ch_mat for details.
3909 
3910    The parallel matrix is partitioned such that the first m0 rows belong to
3911    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3912    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3913 
3914    The DIAGONAL portion of the local submatrix of a processor can be defined
3915    as the submatrix which is obtained by extraction the part corresponding to
3916    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3917    first row that belongs to the processor, r2 is the last row belonging to
3918    the this processor, and c1-c2 is range of indices of the local part of a
3919    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3920    common case of a square matrix, the row and column ranges are the same and
3921    the DIAGONAL part is also square. The remaining portion of the local
3922    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3923 
3924    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3925 
3926    You can call MatGetInfo() to get information on how effective the preallocation was;
3927    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3928    You can also run with the option -info and look for messages with the string
3929    malloc in them to see if additional memory allocation was needed.
3930 
3931    Example usage:
3932 
3933    Consider the following 8x8 matrix with 34 non-zero values, that is
3934    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3935    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3936    as follows:
3937 
3938 .vb
3939             1  2  0  |  0  3  0  |  0  4
3940     Proc0   0  5  6  |  7  0  0  |  8  0
3941             9  0 10  | 11  0  0  | 12  0
3942     -------------------------------------
3943            13  0 14  | 15 16 17  |  0  0
3944     Proc1   0 18  0  | 19 20 21  |  0  0
3945             0  0  0  | 22 23  0  | 24  0
3946     -------------------------------------
3947     Proc2  25 26 27  |  0  0 28  | 29  0
3948            30  0  0  | 31 32 33  |  0 34
3949 .ve
3950 
3951    This can be represented as a collection of submatrices as:
3952 
3953 .vb
3954       A B C
3955       D E F
3956       G H I
3957 .ve
3958 
3959    Where the submatrices A,B,C are owned by proc0, D,E,F are
3960    owned by proc1, G,H,I are owned by proc2.
3961 
3962    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3963    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3964    The 'M','N' parameters are 8,8, and have the same values on all procs.
3965 
3966    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3967    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3968    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3969    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3970    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3971    matrix, ans [DF] as another SeqAIJ matrix.
3972 
3973    When d_nz, o_nz parameters are specified, d_nz storage elements are
3974    allocated for every row of the local diagonal submatrix, and o_nz
3975    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3976    One way to choose d_nz and o_nz is to use the max nonzerors per local
3977    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3978    In this case, the values of d_nz,o_nz are:
3979 .vb
3980      proc0 : dnz = 2, o_nz = 2
3981      proc1 : dnz = 3, o_nz = 2
3982      proc2 : dnz = 1, o_nz = 4
3983 .ve
3984    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3985    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3986    for proc3. i.e we are using 12+15+10=37 storage locations to store
3987    34 values.
3988 
3989    When d_nnz, o_nnz parameters are specified, the storage is specified
3990    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3991    In the above case the values for d_nnz,o_nnz are:
3992 .vb
3993      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3994      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3995      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3996 .ve
3997    Here the space allocated is sum of all the above values i.e 34, and
3998    hence pre-allocation is perfect.
3999 
4000    Level: intermediate
4001 
4002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4003           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4004 @*/
4005 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4006 {
4007   PetscErrorCode ierr;
4008 
4009   PetscFunctionBegin;
4010   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4011   PetscValidType(B,1);
4012   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 /*@
4017      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4018          CSR format for the local rows.
4019 
4020    Collective
4021 
4022    Input Parameters:
4023 +  comm - MPI communicator
4024 .  m - number of local rows (Cannot be PETSC_DECIDE)
4025 .  n - This value should be the same as the local size used in creating the
4026        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4027        calculated if N is given) For square matrices n is almost always m.
4028 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4029 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4030 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4031 .   j - column indices
4032 -   a - matrix values
4033 
4034    Output Parameter:
4035 .   mat - the matrix
4036 
4037    Level: intermediate
4038 
4039    Notes:
4040        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4041      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4042      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4043 
4044        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4045 
4046        The format which is used for the sparse matrix input, is equivalent to a
4047     row-major ordering.. i.e for the following matrix, the input data expected is
4048     as shown
4049 
4050        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4051 
4052 $        1 0 0
4053 $        2 0 3     P0
4054 $       -------
4055 $        4 5 6     P1
4056 $
4057 $     Process0 [P0]: rows_owned=[0,1]
4058 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4059 $        j =  {0,0,2}  [size = 3]
4060 $        v =  {1,2,3}  [size = 3]
4061 $
4062 $     Process1 [P1]: rows_owned=[2]
4063 $        i =  {0,3}    [size = nrow+1  = 1+1]
4064 $        j =  {0,1,2}  [size = 3]
4065 $        v =  {4,5,6}  [size = 3]
4066 
4067 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4068           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4069 @*/
4070 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4071 {
4072   PetscErrorCode ierr;
4073 
4074   PetscFunctionBegin;
4075   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4076   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4077   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4078   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4079   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4080   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4081   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4082   PetscFunctionReturn(0);
4083 }
4084 
4085 /*@
4086      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4087          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4088 
4089    Collective
4090 
4091    Input Parameters:
4092 +  mat - the matrix
4093 .  m - number of local rows (Cannot be PETSC_DECIDE)
4094 .  n - This value should be the same as the local size used in creating the
4095        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4096        calculated if N is given) For square matrices n is almost always m.
4097 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4098 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4099 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4100 .  J - column indices
4101 -  v - matrix values
4102 
4103    Level: intermediate
4104 
4105 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4106           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4107 @*/
4108 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4109 {
4110   PetscErrorCode ierr;
4111   PetscInt       cstart,nnz,i,j;
4112   PetscInt       *ld;
4113   PetscBool      nooffprocentries;
4114   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4115   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4116   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4117   const PetscInt *Adi = Ad->i;
4118   PetscInt       ldi,Iii,md;
4119 
4120   PetscFunctionBegin;
4121   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4122   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4123   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4124   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4125 
4126   cstart = mat->cmap->rstart;
4127   if (!Aij->ld) {
4128     /* count number of entries below block diagonal */
4129     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4130     Aij->ld = ld;
4131     for (i=0; i<m; i++) {
4132       nnz  = Ii[i+1]- Ii[i];
4133       j     = 0;
4134       while  (J[j] < cstart && j < nnz) {j++;}
4135       J    += nnz;
4136       ld[i] = j;
4137     }
4138   } else {
4139     ld = Aij->ld;
4140   }
4141 
4142   for (i=0; i<m; i++) {
4143     nnz  = Ii[i+1]- Ii[i];
4144     Iii  = Ii[i];
4145     ldi  = ld[i];
4146     md   = Adi[i+1]-Adi[i];
4147     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4148     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4149     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4150     ad  += md;
4151     ao  += nnz - md;
4152   }
4153   nooffprocentries      = mat->nooffprocentries;
4154   mat->nooffprocentries = PETSC_TRUE;
4155   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4156   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4157   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4158   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4159   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4160   mat->nooffprocentries = nooffprocentries;
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 /*@C
4165    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4166    (the default parallel PETSc format).  For good matrix assembly performance
4167    the user should preallocate the matrix storage by setting the parameters
4168    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4169    performance can be increased by more than a factor of 50.
4170 
4171    Collective
4172 
4173    Input Parameters:
4174 +  comm - MPI communicator
4175 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4176            This value should be the same as the local size used in creating the
4177            y vector for the matrix-vector product y = Ax.
4178 .  n - This value should be the same as the local size used in creating the
4179        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4180        calculated if N is given) For square matrices n is almost always m.
4181 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4182 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4183 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4184            (same value is used for all local rows)
4185 .  d_nnz - array containing the number of nonzeros in the various rows of the
4186            DIAGONAL portion of the local submatrix (possibly different for each row)
4187            or NULL, if d_nz is used to specify the nonzero structure.
4188            The size of this array is equal to the number of local rows, i.e 'm'.
4189 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4190            submatrix (same value is used for all local rows).
4191 -  o_nnz - array containing the number of nonzeros in the various rows of the
4192            OFF-DIAGONAL portion of the local submatrix (possibly different for
4193            each row) or NULL, if o_nz is used to specify the nonzero
4194            structure. The size of this array is equal to the number
4195            of local rows, i.e 'm'.
4196 
4197    Output Parameter:
4198 .  A - the matrix
4199 
4200    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4201    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4202    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4203 
4204    Notes:
4205    If the *_nnz parameter is given then the *_nz parameter is ignored
4206 
4207    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4208    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4209    storage requirements for this matrix.
4210 
4211    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4212    processor than it must be used on all processors that share the object for
4213    that argument.
4214 
4215    The user MUST specify either the local or global matrix dimensions
4216    (possibly both).
4217 
4218    The parallel matrix is partitioned across processors such that the
4219    first m0 rows belong to process 0, the next m1 rows belong to
4220    process 1, the next m2 rows belong to process 2 etc.. where
4221    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4222    values corresponding to [m x N] submatrix.
4223 
4224    The columns are logically partitioned with the n0 columns belonging
4225    to 0th partition, the next n1 columns belonging to the next
4226    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4227 
4228    The DIAGONAL portion of the local submatrix on any given processor
4229    is the submatrix corresponding to the rows and columns m,n
4230    corresponding to the given processor. i.e diagonal matrix on
4231    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4232    etc. The remaining portion of the local submatrix [m x (N-n)]
4233    constitute the OFF-DIAGONAL portion. The example below better
4234    illustrates this concept.
4235 
4236    For a square global matrix we define each processor's diagonal portion
4237    to be its local rows and the corresponding columns (a square submatrix);
4238    each processor's off-diagonal portion encompasses the remainder of the
4239    local matrix (a rectangular submatrix).
4240 
4241    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4242 
4243    When calling this routine with a single process communicator, a matrix of
4244    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4245    type of communicator, use the construction mechanism
4246 .vb
4247      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4248 .ve
4249 
4250 $     MatCreate(...,&A);
4251 $     MatSetType(A,MATMPIAIJ);
4252 $     MatSetSizes(A, m,n,M,N);
4253 $     MatMPIAIJSetPreallocation(A,...);
4254 
4255    By default, this format uses inodes (identical nodes) when possible.
4256    We search for consecutive rows with the same nonzero structure, thereby
4257    reusing matrix information to achieve increased efficiency.
4258 
4259    Options Database Keys:
4260 +  -mat_no_inode  - Do not use inodes
4261 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4262 
4263 
4264 
4265    Example usage:
4266 
4267    Consider the following 8x8 matrix with 34 non-zero values, that is
4268    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4269    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4270    as follows
4271 
4272 .vb
4273             1  2  0  |  0  3  0  |  0  4
4274     Proc0   0  5  6  |  7  0  0  |  8  0
4275             9  0 10  | 11  0  0  | 12  0
4276     -------------------------------------
4277            13  0 14  | 15 16 17  |  0  0
4278     Proc1   0 18  0  | 19 20 21  |  0  0
4279             0  0  0  | 22 23  0  | 24  0
4280     -------------------------------------
4281     Proc2  25 26 27  |  0  0 28  | 29  0
4282            30  0  0  | 31 32 33  |  0 34
4283 .ve
4284 
4285    This can be represented as a collection of submatrices as
4286 
4287 .vb
4288       A B C
4289       D E F
4290       G H I
4291 .ve
4292 
4293    Where the submatrices A,B,C are owned by proc0, D,E,F are
4294    owned by proc1, G,H,I are owned by proc2.
4295 
4296    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4297    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4298    The 'M','N' parameters are 8,8, and have the same values on all procs.
4299 
4300    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4301    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4302    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4303    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4304    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4305    matrix, ans [DF] as another SeqAIJ matrix.
4306 
4307    When d_nz, o_nz parameters are specified, d_nz storage elements are
4308    allocated for every row of the local diagonal submatrix, and o_nz
4309    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4310    One way to choose d_nz and o_nz is to use the max nonzerors per local
4311    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4312    In this case, the values of d_nz,o_nz are
4313 .vb
4314      proc0 : dnz = 2, o_nz = 2
4315      proc1 : dnz = 3, o_nz = 2
4316      proc2 : dnz = 1, o_nz = 4
4317 .ve
4318    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4319    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4320    for proc3. i.e we are using 12+15+10=37 storage locations to store
4321    34 values.
4322 
4323    When d_nnz, o_nnz parameters are specified, the storage is specified
4324    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4325    In the above case the values for d_nnz,o_nnz are
4326 .vb
4327      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4328      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4329      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4330 .ve
4331    Here the space allocated is sum of all the above values i.e 34, and
4332    hence pre-allocation is perfect.
4333 
4334    Level: intermediate
4335 
4336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4337           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4338 @*/
4339 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4340 {
4341   PetscErrorCode ierr;
4342   PetscMPIInt    size;
4343 
4344   PetscFunctionBegin;
4345   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4346   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4347   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4348   if (size > 1) {
4349     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4350     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4351   } else {
4352     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4353     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4354   }
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 /*@C
4359   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4360 
4361   Not collective
4362 
4363   Input Parameter:
4364 . A - The MPIAIJ matrix
4365 
4366   Output Parameters:
4367 + Ad - The local diagonal block as a SeqAIJ matrix
4368 . Ao - The local off-diagonal block as a SeqAIJ matrix
4369 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4370 
4371   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4372   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4373   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4374   local column numbers to global column numbers in the original matrix.
4375 
4376   Level: intermediate
4377 
4378 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4379 @*/
4380 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4381 {
4382   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4383   PetscBool      flg;
4384   PetscErrorCode ierr;
4385 
4386   PetscFunctionBegin;
4387   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4388   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4389   if (Ad)     *Ad     = a->A;
4390   if (Ao)     *Ao     = a->B;
4391   if (colmap) *colmap = a->garray;
4392   PetscFunctionReturn(0);
4393 }
4394 
4395 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4396 {
4397   PetscErrorCode ierr;
4398   PetscInt       m,N,i,rstart,nnz,Ii;
4399   PetscInt       *indx;
4400   PetscScalar    *values;
4401 
4402   PetscFunctionBegin;
4403   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4404   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4405     PetscInt       *dnz,*onz,sum,bs,cbs;
4406 
4407     if (n == PETSC_DECIDE) {
4408       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4409     }
4410     /* Check sum(n) = N */
4411     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4412     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4413 
4414     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4415     rstart -= m;
4416 
4417     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4418     for (i=0; i<m; i++) {
4419       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4420       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4421       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4422     }
4423 
4424     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4425     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4426     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4427     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4428     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4429     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4430     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4431     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4432   }
4433 
4434   /* numeric phase */
4435   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4436   for (i=0; i<m; i++) {
4437     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4438     Ii   = i + rstart;
4439     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4440     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4441   }
4442   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4443   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4444   PetscFunctionReturn(0);
4445 }
4446 
4447 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4448 {
4449   PetscErrorCode    ierr;
4450   PetscMPIInt       rank;
4451   PetscInt          m,N,i,rstart,nnz;
4452   size_t            len;
4453   const PetscInt    *indx;
4454   PetscViewer       out;
4455   char              *name;
4456   Mat               B;
4457   const PetscScalar *values;
4458 
4459   PetscFunctionBegin;
4460   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4461   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4462   /* Should this be the type of the diagonal block of A? */
4463   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4464   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4465   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4466   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4467   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4468   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4469   for (i=0; i<m; i++) {
4470     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4471     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4472     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4473   }
4474   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4475   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4476 
4477   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4478   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4479   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4480   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4481   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4482   ierr = PetscFree(name);CHKERRQ(ierr);
4483   ierr = MatView(B,out);CHKERRQ(ierr);
4484   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4485   ierr = MatDestroy(&B);CHKERRQ(ierr);
4486   PetscFunctionReturn(0);
4487 }
4488 
4489 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4490 {
4491   PetscErrorCode      ierr;
4492   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4493 
4494   PetscFunctionBegin;
4495   if (!merge) PetscFunctionReturn(0);
4496   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4497   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4498   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4499   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4500   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4501   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4502   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4503   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4504   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4505   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4506   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4507   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4508   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4509   ierr = PetscFree(merge);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #include <../src/mat/utils/freespace.h>
4514 #include <petscbt.h>
4515 
4516 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4517 {
4518   PetscErrorCode      ierr;
4519   MPI_Comm            comm;
4520   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4521   PetscMPIInt         size,rank,taga,*len_s;
4522   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4523   PetscInt            proc,m;
4524   PetscInt            **buf_ri,**buf_rj;
4525   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4526   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4527   MPI_Request         *s_waits,*r_waits;
4528   MPI_Status          *status;
4529   MatScalar           *aa=a->a;
4530   MatScalar           **abuf_r,*ba_i;
4531   Mat_Merge_SeqsToMPI *merge;
4532   PetscContainer      container;
4533 
4534   PetscFunctionBegin;
4535   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4536   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4537 
4538   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4539   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4540 
4541   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4542   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4543   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4544 
4545   bi     = merge->bi;
4546   bj     = merge->bj;
4547   buf_ri = merge->buf_ri;
4548   buf_rj = merge->buf_rj;
4549 
4550   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4551   owners = merge->rowmap->range;
4552   len_s  = merge->len_s;
4553 
4554   /* send and recv matrix values */
4555   /*-----------------------------*/
4556   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4557   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4558 
4559   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4560   for (proc=0,k=0; proc<size; proc++) {
4561     if (!len_s[proc]) continue;
4562     i    = owners[proc];
4563     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4564     k++;
4565   }
4566 
4567   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4568   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4569   ierr = PetscFree(status);CHKERRQ(ierr);
4570 
4571   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4572   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4573 
4574   /* insert mat values of mpimat */
4575   /*----------------------------*/
4576   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4577   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4578 
4579   for (k=0; k<merge->nrecv; k++) {
4580     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4581     nrows       = *(buf_ri_k[k]);
4582     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4583     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4584   }
4585 
4586   /* set values of ba */
4587   m = merge->rowmap->n;
4588   for (i=0; i<m; i++) {
4589     arow = owners[rank] + i;
4590     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4591     bnzi = bi[i+1] - bi[i];
4592     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4593 
4594     /* add local non-zero vals of this proc's seqmat into ba */
4595     anzi   = ai[arow+1] - ai[arow];
4596     aj     = a->j + ai[arow];
4597     aa     = a->a + ai[arow];
4598     nextaj = 0;
4599     for (j=0; nextaj<anzi; j++) {
4600       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4601         ba_i[j] += aa[nextaj++];
4602       }
4603     }
4604 
4605     /* add received vals into ba */
4606     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4607       /* i-th row */
4608       if (i == *nextrow[k]) {
4609         anzi   = *(nextai[k]+1) - *nextai[k];
4610         aj     = buf_rj[k] + *(nextai[k]);
4611         aa     = abuf_r[k] + *(nextai[k]);
4612         nextaj = 0;
4613         for (j=0; nextaj<anzi; j++) {
4614           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4615             ba_i[j] += aa[nextaj++];
4616           }
4617         }
4618         nextrow[k]++; nextai[k]++;
4619       }
4620     }
4621     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4622   }
4623   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4624   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4625 
4626   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4627   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4628   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4629   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4630   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4631   PetscFunctionReturn(0);
4632 }
4633 
4634 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4635 {
4636   PetscErrorCode      ierr;
4637   Mat                 B_mpi;
4638   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4639   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4640   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4641   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4642   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4643   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4644   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4645   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4646   MPI_Status          *status;
4647   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4648   PetscBT             lnkbt;
4649   Mat_Merge_SeqsToMPI *merge;
4650   PetscContainer      container;
4651 
4652   PetscFunctionBegin;
4653   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4654 
4655   /* make sure it is a PETSc comm */
4656   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4658   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4659 
4660   ierr = PetscNew(&merge);CHKERRQ(ierr);
4661   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4662 
4663   /* determine row ownership */
4664   /*---------------------------------------------------------*/
4665   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4670   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4671   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4672 
4673   m      = merge->rowmap->n;
4674   owners = merge->rowmap->range;
4675 
4676   /* determine the number of messages to send, their lengths */
4677   /*---------------------------------------------------------*/
4678   len_s = merge->len_s;
4679 
4680   len          = 0; /* length of buf_si[] */
4681   merge->nsend = 0;
4682   for (proc=0; proc<size; proc++) {
4683     len_si[proc] = 0;
4684     if (proc == rank) {
4685       len_s[proc] = 0;
4686     } else {
4687       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4688       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4689     }
4690     if (len_s[proc]) {
4691       merge->nsend++;
4692       nrows = 0;
4693       for (i=owners[proc]; i<owners[proc+1]; i++) {
4694         if (ai[i+1] > ai[i]) nrows++;
4695       }
4696       len_si[proc] = 2*(nrows+1);
4697       len         += len_si[proc];
4698     }
4699   }
4700 
4701   /* determine the number and length of messages to receive for ij-structure */
4702   /*-------------------------------------------------------------------------*/
4703   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4704   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4705 
4706   /* post the Irecv of j-structure */
4707   /*-------------------------------*/
4708   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4709   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4710 
4711   /* post the Isend of j-structure */
4712   /*--------------------------------*/
4713   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4714 
4715   for (proc=0, k=0; proc<size; proc++) {
4716     if (!len_s[proc]) continue;
4717     i    = owners[proc];
4718     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4719     k++;
4720   }
4721 
4722   /* receives and sends of j-structure are complete */
4723   /*------------------------------------------------*/
4724   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4725   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4726 
4727   /* send and recv i-structure */
4728   /*---------------------------*/
4729   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4730   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4731 
4732   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4733   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4734   for (proc=0,k=0; proc<size; proc++) {
4735     if (!len_s[proc]) continue;
4736     /* form outgoing message for i-structure:
4737          buf_si[0]:                 nrows to be sent
4738                [1:nrows]:           row index (global)
4739                [nrows+1:2*nrows+1]: i-structure index
4740     */
4741     /*-------------------------------------------*/
4742     nrows       = len_si[proc]/2 - 1;
4743     buf_si_i    = buf_si + nrows+1;
4744     buf_si[0]   = nrows;
4745     buf_si_i[0] = 0;
4746     nrows       = 0;
4747     for (i=owners[proc]; i<owners[proc+1]; i++) {
4748       anzi = ai[i+1] - ai[i];
4749       if (anzi) {
4750         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4751         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4752         nrows++;
4753       }
4754     }
4755     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4756     k++;
4757     buf_si += len_si[proc];
4758   }
4759 
4760   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4761   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4762 
4763   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4764   for (i=0; i<merge->nrecv; i++) {
4765     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4766   }
4767 
4768   ierr = PetscFree(len_si);CHKERRQ(ierr);
4769   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4770   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4771   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4772   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4773   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4774   ierr = PetscFree(status);CHKERRQ(ierr);
4775 
4776   /* compute a local seq matrix in each processor */
4777   /*----------------------------------------------*/
4778   /* allocate bi array and free space for accumulating nonzero column info */
4779   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4780   bi[0] = 0;
4781 
4782   /* create and initialize a linked list */
4783   nlnk = N+1;
4784   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4785 
4786   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4787   len  = ai[owners[rank+1]] - ai[owners[rank]];
4788   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4789 
4790   current_space = free_space;
4791 
4792   /* determine symbolic info for each local row */
4793   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4794 
4795   for (k=0; k<merge->nrecv; k++) {
4796     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4797     nrows       = *buf_ri_k[k];
4798     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4799     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4800   }
4801 
4802   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4803   len  = 0;
4804   for (i=0; i<m; i++) {
4805     bnzi = 0;
4806     /* add local non-zero cols of this proc's seqmat into lnk */
4807     arow  = owners[rank] + i;
4808     anzi  = ai[arow+1] - ai[arow];
4809     aj    = a->j + ai[arow];
4810     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4811     bnzi += nlnk;
4812     /* add received col data into lnk */
4813     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4814       if (i == *nextrow[k]) { /* i-th row */
4815         anzi  = *(nextai[k]+1) - *nextai[k];
4816         aj    = buf_rj[k] + *nextai[k];
4817         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4818         bnzi += nlnk;
4819         nextrow[k]++; nextai[k]++;
4820       }
4821     }
4822     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4823 
4824     /* if free space is not available, make more free space */
4825     if (current_space->local_remaining<bnzi) {
4826       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4827       nspacedouble++;
4828     }
4829     /* copy data into free space, then initialize lnk */
4830     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4831     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4832 
4833     current_space->array           += bnzi;
4834     current_space->local_used      += bnzi;
4835     current_space->local_remaining -= bnzi;
4836 
4837     bi[i+1] = bi[i] + bnzi;
4838   }
4839 
4840   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4841 
4842   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4843   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4844   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4845 
4846   /* create symbolic parallel matrix B_mpi */
4847   /*---------------------------------------*/
4848   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4849   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4850   if (n==PETSC_DECIDE) {
4851     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4852   } else {
4853     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4854   }
4855   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4856   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4857   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4858   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4859   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4860 
4861   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4862   B_mpi->assembled  = PETSC_FALSE;
4863   merge->bi         = bi;
4864   merge->bj         = bj;
4865   merge->buf_ri     = buf_ri;
4866   merge->buf_rj     = buf_rj;
4867   merge->coi        = NULL;
4868   merge->coj        = NULL;
4869   merge->owners_co  = NULL;
4870 
4871   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4872 
4873   /* attach the supporting struct to B_mpi for reuse */
4874   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4875   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4876   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4877   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4878   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4879   *mpimat = B_mpi;
4880 
4881   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4882   PetscFunctionReturn(0);
4883 }
4884 
4885 /*@C
4886       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4887                  matrices from each processor
4888 
4889     Collective
4890 
4891    Input Parameters:
4892 +    comm - the communicators the parallel matrix will live on
4893 .    seqmat - the input sequential matrices
4894 .    m - number of local rows (or PETSC_DECIDE)
4895 .    n - number of local columns (or PETSC_DECIDE)
4896 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4897 
4898    Output Parameter:
4899 .    mpimat - the parallel matrix generated
4900 
4901     Level: advanced
4902 
4903    Notes:
4904      The dimensions of the sequential matrix in each processor MUST be the same.
4905      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4906      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4907 @*/
4908 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4909 {
4910   PetscErrorCode ierr;
4911   PetscMPIInt    size;
4912 
4913   PetscFunctionBegin;
4914   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4915   if (size == 1) {
4916     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4917     if (scall == MAT_INITIAL_MATRIX) {
4918       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4919     } else {
4920       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4921     }
4922     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4923     PetscFunctionReturn(0);
4924   }
4925   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4926   if (scall == MAT_INITIAL_MATRIX) {
4927     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4928   }
4929   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4930   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4931   PetscFunctionReturn(0);
4932 }
4933 
4934 /*@
4935      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4936           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4937           with MatGetSize()
4938 
4939     Not Collective
4940 
4941    Input Parameters:
4942 +    A - the matrix
4943 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4944 
4945    Output Parameter:
4946 .    A_loc - the local sequential matrix generated
4947 
4948     Level: developer
4949 
4950    Notes:
4951      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4952      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4953      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4954      modify the values of the returned A_loc.
4955 
4956 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4957 
4958 @*/
4959 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4960 {
4961   PetscErrorCode ierr;
4962   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4963   Mat_SeqAIJ     *mat,*a,*b;
4964   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4965   MatScalar      *aa,*ba,*cam;
4966   PetscScalar    *ca;
4967   PetscMPIInt    size;
4968   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4969   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4970   PetscBool      match;
4971 
4972   PetscFunctionBegin;
4973   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4974   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4975   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4976   if (size == 1) {
4977     if (scall == MAT_INITIAL_MATRIX) {
4978       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4979       *A_loc = mpimat->A;
4980     } else if (scall == MAT_REUSE_MATRIX) {
4981       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4982     }
4983     PetscFunctionReturn(0);
4984   }
4985 
4986   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4987   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4988   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4989   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4990   aa = a->a; ba = b->a;
4991   if (scall == MAT_INITIAL_MATRIX) {
4992     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4993     ci[0] = 0;
4994     for (i=0; i<am; i++) {
4995       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4996     }
4997     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4998     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4999     k    = 0;
5000     for (i=0; i<am; i++) {
5001       ncols_o = bi[i+1] - bi[i];
5002       ncols_d = ai[i+1] - ai[i];
5003       /* off-diagonal portion of A */
5004       for (jo=0; jo<ncols_o; jo++) {
5005         col = cmap[*bj];
5006         if (col >= cstart) break;
5007         cj[k]   = col; bj++;
5008         ca[k++] = *ba++;
5009       }
5010       /* diagonal portion of A */
5011       for (j=0; j<ncols_d; j++) {
5012         cj[k]   = cstart + *aj++;
5013         ca[k++] = *aa++;
5014       }
5015       /* off-diagonal portion of A */
5016       for (j=jo; j<ncols_o; j++) {
5017         cj[k]   = cmap[*bj++];
5018         ca[k++] = *ba++;
5019       }
5020     }
5021     /* put together the new matrix */
5022     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5023     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5024     /* Since these are PETSc arrays, change flags to free them as necessary. */
5025     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5026     mat->free_a  = PETSC_TRUE;
5027     mat->free_ij = PETSC_TRUE;
5028     mat->nonew   = 0;
5029   } else if (scall == MAT_REUSE_MATRIX) {
5030     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5031     ci = mat->i; cj = mat->j; cam = mat->a;
5032     for (i=0; i<am; i++) {
5033       /* off-diagonal portion of A */
5034       ncols_o = bi[i+1] - bi[i];
5035       for (jo=0; jo<ncols_o; jo++) {
5036         col = cmap[*bj];
5037         if (col >= cstart) break;
5038         *cam++ = *ba++; bj++;
5039       }
5040       /* diagonal portion of A */
5041       ncols_d = ai[i+1] - ai[i];
5042       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5043       /* off-diagonal portion of A */
5044       for (j=jo; j<ncols_o; j++) {
5045         *cam++ = *ba++; bj++;
5046       }
5047     }
5048   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5049   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5050   PetscFunctionReturn(0);
5051 }
5052 
5053 /*@C
5054      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5055 
5056     Not Collective
5057 
5058    Input Parameters:
5059 +    A - the matrix
5060 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5061 -    row, col - index sets of rows and columns to extract (or NULL)
5062 
5063    Output Parameter:
5064 .    A_loc - the local sequential matrix generated
5065 
5066     Level: developer
5067 
5068 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5069 
5070 @*/
5071 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5072 {
5073   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5074   PetscErrorCode ierr;
5075   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5076   IS             isrowa,iscola;
5077   Mat            *aloc;
5078   PetscBool      match;
5079 
5080   PetscFunctionBegin;
5081   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5082   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5083   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5084   if (!row) {
5085     start = A->rmap->rstart; end = A->rmap->rend;
5086     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5087   } else {
5088     isrowa = *row;
5089   }
5090   if (!col) {
5091     start = A->cmap->rstart;
5092     cmap  = a->garray;
5093     nzA   = a->A->cmap->n;
5094     nzB   = a->B->cmap->n;
5095     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5096     ncols = 0;
5097     for (i=0; i<nzB; i++) {
5098       if (cmap[i] < start) idx[ncols++] = cmap[i];
5099       else break;
5100     }
5101     imark = i;
5102     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5103     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5104     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5105   } else {
5106     iscola = *col;
5107   }
5108   if (scall != MAT_INITIAL_MATRIX) {
5109     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5110     aloc[0] = *A_loc;
5111   }
5112   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5113   if (!col) { /* attach global id of condensed columns */
5114     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5115   }
5116   *A_loc = aloc[0];
5117   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5118   if (!row) {
5119     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5120   }
5121   if (!col) {
5122     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5123   }
5124   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5125   PetscFunctionReturn(0);
5126 }
5127 
5128 /*
5129  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5130  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5131  * on a global size.
5132  * */
5133 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5134 {
5135   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5136   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5137   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5138   PetscMPIInt              owner;
5139   PetscSFNode              *iremote,*oiremote;
5140   const PetscInt           *lrowindices;
5141   PetscErrorCode           ierr;
5142   PetscSF                  sf,osf;
5143   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5144   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5145   MPI_Comm                 comm;
5146   ISLocalToGlobalMapping   mapping;
5147 
5148   PetscFunctionBegin;
5149   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5150   /* plocalsize is the number of roots
5151    * nrows is the number of leaves
5152    * */
5153   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5154   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5155   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5156   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5157   for (i=0;i<nrows;i++) {
5158     /* Find a remote index and an owner for a row
5159      * The row could be local or remote
5160      * */
5161     owner = 0;
5162     lidx  = 0;
5163     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5164     iremote[i].index = lidx;
5165     iremote[i].rank  = owner;
5166   }
5167   /* Create SF to communicate how many nonzero columns for each row */
5168   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5169   /* SF will figure out the number of nonzero colunms for each row, and their
5170    * offsets
5171    * */
5172   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5173   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5174   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5175 
5176   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5177   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5178   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5179   roffsets[0] = 0;
5180   roffsets[1] = 0;
5181   for (i=0;i<plocalsize;i++) {
5182     /* diag */
5183     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5184     /* off diag */
5185     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5186     /* compute offsets so that we relative location for each row */
5187     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5188     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5189   }
5190   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5191   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5192   /* 'r' means root, and 'l' means leaf */
5193   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5194   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5195   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5196   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5197   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5198   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5199   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5200   dntotalcols = 0;
5201   ontotalcols = 0;
5202   ncol = 0;
5203   for (i=0;i<nrows;i++) {
5204     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5205     ncol = PetscMax(pnnz[i],ncol);
5206     /* diag */
5207     dntotalcols += nlcols[i*2+0];
5208     /* off diag */
5209     ontotalcols += nlcols[i*2+1];
5210   }
5211   /* We do not need to figure the right number of columns
5212    * since all the calculations will be done by going through the raw data
5213    * */
5214   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5215   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5216   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5217   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5218   /* diag */
5219   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5220   /* off diag */
5221   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5222   /* diag */
5223   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5224   /* off diag */
5225   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5226   dntotalcols = 0;
5227   ontotalcols = 0;
5228   ntotalcols  = 0;
5229   for (i=0;i<nrows;i++) {
5230     owner = 0;
5231     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5232     /* Set iremote for diag matrix */
5233     for (j=0;j<nlcols[i*2+0];j++) {
5234       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5235       iremote[dntotalcols].rank    = owner;
5236       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5237       ilocal[dntotalcols++]        = ntotalcols++;
5238     }
5239     /* off diag */
5240     for (j=0;j<nlcols[i*2+1];j++) {
5241       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5242       oiremote[ontotalcols].rank    = owner;
5243       oilocal[ontotalcols++]        = ntotalcols++;
5244     }
5245   }
5246   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5247   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5248   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5249   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5250   /* P serves as roots and P_oth is leaves
5251    * Diag matrix
5252    * */
5253   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5254   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5255   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5256 
5257   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5258   /* Off diag */
5259   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5260   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5261   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5262   /* We operate on the matrix internal data for saving memory */
5263   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5264   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5265   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5266   /* Convert to global indices for diag matrix */
5267   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5268   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5269   /* We want P_oth store global indices */
5270   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5271   /* Use memory scalable approach */
5272   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5273   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5274   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5275   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5276   /* Convert back to local indices */
5277   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5278   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5279   nout = 0;
5280   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5281   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5282   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5283   /* Exchange values */
5284   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5285   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5286   /* Stop PETSc from shrinking memory */
5287   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5288   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5289   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5290   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5291   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5292   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5293   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5294   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5295   PetscFunctionReturn(0);
5296 }
5297 
5298 /*
5299  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5300  * This supports MPIAIJ and MAIJ
5301  * */
5302 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5303 {
5304   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5305   Mat_SeqAIJ            *p_oth;
5306   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5307   IS                    rows,map;
5308   PetscHMapI            hamp;
5309   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5310   MPI_Comm              comm;
5311   PetscSF               sf,osf;
5312   PetscBool             has;
5313   PetscErrorCode        ierr;
5314 
5315   PetscFunctionBegin;
5316   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5317   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5318   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5319    *  and then create a submatrix (that often is an overlapping matrix)
5320    * */
5321   if (reuse == MAT_INITIAL_MATRIX) {
5322     /* Use a hash table to figure out unique keys */
5323     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5324     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5325     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5326     count = 0;
5327     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5328     for (i=0;i<a->B->cmap->n;i++) {
5329       key  = a->garray[i]/dof;
5330       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5331       if (!has) {
5332         mapping[i] = count;
5333         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5334       } else {
5335         /* Current 'i' has the same value the previous step */
5336         mapping[i] = count-1;
5337       }
5338     }
5339     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5340     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5341     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5342     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5343     off = 0;
5344     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5345     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5346     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5347     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5348     /* In case, the matrix was already created but users want to recreate the matrix */
5349     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5350     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5351     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5352     ierr = ISDestroy(&map);CHKERRQ(ierr);
5353     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5354   } else if (reuse == MAT_REUSE_MATRIX) {
5355     /* If matrix was already created, we simply update values using SF objects
5356      * that as attached to the matrix ealier.
5357      *  */
5358     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5359     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5360     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5361     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5362     /* Update values in place */
5363     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5364     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5365     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5366     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5367   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5368   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5369   PetscFunctionReturn(0);
5370 }
5371 
5372 /*@C
5373     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5374 
5375     Collective on Mat
5376 
5377    Input Parameters:
5378 +    A,B - the matrices in mpiaij format
5379 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5380 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5381 
5382    Output Parameter:
5383 +    rowb, colb - index sets of rows and columns of B to extract
5384 -    B_seq - the sequential matrix generated
5385 
5386     Level: developer
5387 
5388 @*/
5389 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5390 {
5391   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5392   PetscErrorCode ierr;
5393   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5394   IS             isrowb,iscolb;
5395   Mat            *bseq=NULL;
5396 
5397   PetscFunctionBegin;
5398   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5399     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5400   }
5401   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5402 
5403   if (scall == MAT_INITIAL_MATRIX) {
5404     start = A->cmap->rstart;
5405     cmap  = a->garray;
5406     nzA   = a->A->cmap->n;
5407     nzB   = a->B->cmap->n;
5408     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5409     ncols = 0;
5410     for (i=0; i<nzB; i++) {  /* row < local row index */
5411       if (cmap[i] < start) idx[ncols++] = cmap[i];
5412       else break;
5413     }
5414     imark = i;
5415     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5416     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5417     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5418     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5419   } else {
5420     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5421     isrowb  = *rowb; iscolb = *colb;
5422     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5423     bseq[0] = *B_seq;
5424   }
5425   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5426   *B_seq = bseq[0];
5427   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5428   if (!rowb) {
5429     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5430   } else {
5431     *rowb = isrowb;
5432   }
5433   if (!colb) {
5434     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5435   } else {
5436     *colb = iscolb;
5437   }
5438   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5439   PetscFunctionReturn(0);
5440 }
5441 
5442 /*
5443     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5444     of the OFF-DIAGONAL portion of local A
5445 
5446     Collective on Mat
5447 
5448    Input Parameters:
5449 +    A,B - the matrices in mpiaij format
5450 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5451 
5452    Output Parameter:
5453 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5454 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5455 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5456 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5457 
5458     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5459      for this matrix. This is not desirable..
5460 
5461     Level: developer
5462 
5463 */
5464 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5465 {
5466   PetscErrorCode         ierr;
5467   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5468   Mat_SeqAIJ             *b_oth;
5469   VecScatter             ctx;
5470   MPI_Comm               comm;
5471   const PetscMPIInt      *rprocs,*sprocs;
5472   const PetscInt         *srow,*rstarts,*sstarts;
5473   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5474   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5475   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5476   MPI_Request            *rwaits = NULL,*swaits = NULL;
5477   MPI_Status             rstatus;
5478   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5479 
5480   PetscFunctionBegin;
5481   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5482   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5483 
5484   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5485     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5486   }
5487   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5488   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5489 
5490   if (size == 1) {
5491     startsj_s = NULL;
5492     bufa_ptr  = NULL;
5493     *B_oth    = NULL;
5494     PetscFunctionReturn(0);
5495   }
5496 
5497   ctx = a->Mvctx;
5498   tag = ((PetscObject)ctx)->tag;
5499 
5500   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5501   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5502   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5503   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5504   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5505   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5506   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5507 
5508   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5509   if (scall == MAT_INITIAL_MATRIX) {
5510     /* i-array */
5511     /*---------*/
5512     /*  post receives */
5513     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5514     for (i=0; i<nrecvs; i++) {
5515       rowlen = rvalues + rstarts[i]*rbs;
5516       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5517       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5518     }
5519 
5520     /* pack the outgoing message */
5521     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5522 
5523     sstartsj[0] = 0;
5524     rstartsj[0] = 0;
5525     len         = 0; /* total length of j or a array to be sent */
5526     if (nsends) {
5527       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5528       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5529     }
5530     for (i=0; i<nsends; i++) {
5531       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5532       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5533       for (j=0; j<nrows; j++) {
5534         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5535         for (l=0; l<sbs; l++) {
5536           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5537 
5538           rowlen[j*sbs+l] = ncols;
5539 
5540           len += ncols;
5541           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5542         }
5543         k++;
5544       }
5545       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5546 
5547       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5548     }
5549     /* recvs and sends of i-array are completed */
5550     i = nrecvs;
5551     while (i--) {
5552       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5553     }
5554     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5555     ierr = PetscFree(svalues);CHKERRQ(ierr);
5556 
5557     /* allocate buffers for sending j and a arrays */
5558     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5559     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5560 
5561     /* create i-array of B_oth */
5562     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5563 
5564     b_othi[0] = 0;
5565     len       = 0; /* total length of j or a array to be received */
5566     k         = 0;
5567     for (i=0; i<nrecvs; i++) {
5568       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5569       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5570       for (j=0; j<nrows; j++) {
5571         b_othi[k+1] = b_othi[k] + rowlen[j];
5572         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5573         k++;
5574       }
5575       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5576     }
5577     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5578 
5579     /* allocate space for j and a arrrays of B_oth */
5580     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5581     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5582 
5583     /* j-array */
5584     /*---------*/
5585     /*  post receives of j-array */
5586     for (i=0; i<nrecvs; i++) {
5587       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5588       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5589     }
5590 
5591     /* pack the outgoing message j-array */
5592     if (nsends) k = sstarts[0];
5593     for (i=0; i<nsends; i++) {
5594       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5595       bufJ  = bufj+sstartsj[i];
5596       for (j=0; j<nrows; j++) {
5597         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5598         for (ll=0; ll<sbs; ll++) {
5599           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5600           for (l=0; l<ncols; l++) {
5601             *bufJ++ = cols[l];
5602           }
5603           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5604         }
5605       }
5606       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5607     }
5608 
5609     /* recvs and sends of j-array are completed */
5610     i = nrecvs;
5611     while (i--) {
5612       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5613     }
5614     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5615   } else if (scall == MAT_REUSE_MATRIX) {
5616     sstartsj = *startsj_s;
5617     rstartsj = *startsj_r;
5618     bufa     = *bufa_ptr;
5619     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5620     b_otha   = b_oth->a;
5621   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5622 
5623   /* a-array */
5624   /*---------*/
5625   /*  post receives of a-array */
5626   for (i=0; i<nrecvs; i++) {
5627     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5628     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5629   }
5630 
5631   /* pack the outgoing message a-array */
5632   if (nsends) k = sstarts[0];
5633   for (i=0; i<nsends; i++) {
5634     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5635     bufA  = bufa+sstartsj[i];
5636     for (j=0; j<nrows; j++) {
5637       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5638       for (ll=0; ll<sbs; ll++) {
5639         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5640         for (l=0; l<ncols; l++) {
5641           *bufA++ = vals[l];
5642         }
5643         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5644       }
5645     }
5646     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5647   }
5648   /* recvs and sends of a-array are completed */
5649   i = nrecvs;
5650   while (i--) {
5651     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5652   }
5653   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5654   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5655 
5656   if (scall == MAT_INITIAL_MATRIX) {
5657     /* put together the new matrix */
5658     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5659 
5660     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5661     /* Since these are PETSc arrays, change flags to free them as necessary. */
5662     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5663     b_oth->free_a  = PETSC_TRUE;
5664     b_oth->free_ij = PETSC_TRUE;
5665     b_oth->nonew   = 0;
5666 
5667     ierr = PetscFree(bufj);CHKERRQ(ierr);
5668     if (!startsj_s || !bufa_ptr) {
5669       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5670       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5671     } else {
5672       *startsj_s = sstartsj;
5673       *startsj_r = rstartsj;
5674       *bufa_ptr  = bufa;
5675     }
5676   }
5677 
5678   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5679   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5680   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5681   PetscFunctionReturn(0);
5682 }
5683 
5684 /*@C
5685   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5686 
5687   Not Collective
5688 
5689   Input Parameters:
5690 . A - The matrix in mpiaij format
5691 
5692   Output Parameter:
5693 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5694 . colmap - A map from global column index to local index into lvec
5695 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5696 
5697   Level: developer
5698 
5699 @*/
5700 #if defined(PETSC_USE_CTABLE)
5701 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5702 #else
5703 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5704 #endif
5705 {
5706   Mat_MPIAIJ *a;
5707 
5708   PetscFunctionBegin;
5709   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5710   PetscValidPointer(lvec, 2);
5711   PetscValidPointer(colmap, 3);
5712   PetscValidPointer(multScatter, 4);
5713   a = (Mat_MPIAIJ*) A->data;
5714   if (lvec) *lvec = a->lvec;
5715   if (colmap) *colmap = a->colmap;
5716   if (multScatter) *multScatter = a->Mvctx;
5717   PetscFunctionReturn(0);
5718 }
5719 
5720 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5721 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5722 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5723 #if defined(PETSC_HAVE_MKL_SPARSE)
5724 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5725 #endif
5726 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5727 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5728 #if defined(PETSC_HAVE_ELEMENTAL)
5729 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5730 #endif
5731 #if defined(PETSC_HAVE_SCALAPACK)
5732 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5733 #endif
5734 #if defined(PETSC_HAVE_HYPRE)
5735 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5736 #endif
5737 #if defined(PETSC_HAVE_CUDA)
5738 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5739 #endif
5740 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5741 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5742 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5743 
5744 /*
5745     Computes (B'*A')' since computing B*A directly is untenable
5746 
5747                n                       p                          p
5748         (              )       (              )         (                  )
5749       m (      A       )  *  n (       B      )   =   m (         C        )
5750         (              )       (              )         (                  )
5751 
5752 */
5753 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5754 {
5755   PetscErrorCode ierr;
5756   Mat            At,Bt,Ct;
5757 
5758   PetscFunctionBegin;
5759   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5760   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5761   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5762   ierr = MatDestroy(&At);CHKERRQ(ierr);
5763   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5764   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5765   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5766   PetscFunctionReturn(0);
5767 }
5768 
5769 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5770 {
5771   PetscErrorCode ierr;
5772   PetscBool      cisdense;
5773 
5774   PetscFunctionBegin;
5775   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5776   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5777   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5778   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5779   if (!cisdense) {
5780     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5781   }
5782   ierr = MatSetUp(C);CHKERRQ(ierr);
5783 
5784   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5785   PetscFunctionReturn(0);
5786 }
5787 
5788 /* ----------------------------------------------------------------*/
5789 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5790 {
5791   Mat_Product *product = C->product;
5792   Mat         A = product->A,B=product->B;
5793 
5794   PetscFunctionBegin;
5795   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5796     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5797 
5798   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5799   C->ops->productsymbolic = MatProductSymbolic_AB;
5800   PetscFunctionReturn(0);
5801 }
5802 
5803 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5804 {
5805   PetscErrorCode ierr;
5806   Mat_Product    *product = C->product;
5807 
5808   PetscFunctionBegin;
5809   if (product->type == MATPRODUCT_AB) {
5810     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5811   }
5812   PetscFunctionReturn(0);
5813 }
5814 /* ----------------------------------------------------------------*/
5815 
5816 /*MC
5817    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5818 
5819    Options Database Keys:
5820 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5821 
5822    Level: beginner
5823 
5824    Notes:
5825     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5826     in this case the values associated with the rows and columns one passes in are set to zero
5827     in the matrix
5828 
5829     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5830     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5831 
5832 .seealso: MatCreateAIJ()
5833 M*/
5834 
5835 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5836 {
5837   Mat_MPIAIJ     *b;
5838   PetscErrorCode ierr;
5839   PetscMPIInt    size;
5840 
5841   PetscFunctionBegin;
5842   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5843 
5844   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5845   B->data       = (void*)b;
5846   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5847   B->assembled  = PETSC_FALSE;
5848   B->insertmode = NOT_SET_VALUES;
5849   b->size       = size;
5850 
5851   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5852 
5853   /* build cache for off array entries formed */
5854   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5855 
5856   b->donotstash  = PETSC_FALSE;
5857   b->colmap      = 0;
5858   b->garray      = 0;
5859   b->roworiented = PETSC_TRUE;
5860 
5861   /* stuff used for matrix vector multiply */
5862   b->lvec  = NULL;
5863   b->Mvctx = NULL;
5864 
5865   /* stuff for MatGetRow() */
5866   b->rowindices   = 0;
5867   b->rowvalues    = 0;
5868   b->getrowactive = PETSC_FALSE;
5869 
5870   /* flexible pointer used in CUSP/CUSPARSE classes */
5871   b->spptr = NULL;
5872 
5873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5879   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5880   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5881   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5882   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5883 #if defined(PETSC_HAVE_MKL_SPARSE)
5884   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5885 #endif
5886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5888   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5889 #if defined(PETSC_HAVE_ELEMENTAL)
5890   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5891 #endif
5892 #if defined(PETSC_HAVE_SCALAPACK)
5893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
5894 #endif
5895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5896   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5897 #if defined(PETSC_HAVE_HYPRE)
5898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5899   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5900 #endif
5901   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5902   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5903   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5904   PetscFunctionReturn(0);
5905 }
5906 
5907 /*@C
5908      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5909          and "off-diagonal" part of the matrix in CSR format.
5910 
5911    Collective
5912 
5913    Input Parameters:
5914 +  comm - MPI communicator
5915 .  m - number of local rows (Cannot be PETSC_DECIDE)
5916 .  n - This value should be the same as the local size used in creating the
5917        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5918        calculated if N is given) For square matrices n is almost always m.
5919 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5920 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5921 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5922 .   j - column indices
5923 .   a - matrix values
5924 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5925 .   oj - column indices
5926 -   oa - matrix values
5927 
5928    Output Parameter:
5929 .   mat - the matrix
5930 
5931    Level: advanced
5932 
5933    Notes:
5934        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5935        must free the arrays once the matrix has been destroyed and not before.
5936 
5937        The i and j indices are 0 based
5938 
5939        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5940 
5941        This sets local rows and cannot be used to set off-processor values.
5942 
5943        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5944        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5945        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5946        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5947        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5948        communication if it is known that only local entries will be set.
5949 
5950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5951           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5952 @*/
5953 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5954 {
5955   PetscErrorCode ierr;
5956   Mat_MPIAIJ     *maij;
5957 
5958   PetscFunctionBegin;
5959   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5960   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5961   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5962   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5963   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5964   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5965   maij = (Mat_MPIAIJ*) (*mat)->data;
5966 
5967   (*mat)->preallocated = PETSC_TRUE;
5968 
5969   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5970   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5971 
5972   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5973   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5974 
5975   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5976   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5977   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5978   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5979 
5980   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5981   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5982   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5983   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5984   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5985   PetscFunctionReturn(0);
5986 }
5987 
5988 /*
5989     Special version for direct calls from Fortran
5990 */
5991 #include <petsc/private/fortranimpl.h>
5992 
5993 /* Change these macros so can be used in void function */
5994 #undef CHKERRQ
5995 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5996 #undef SETERRQ2
5997 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5998 #undef SETERRQ3
5999 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6000 #undef SETERRQ
6001 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6002 
6003 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6004 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6005 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6006 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6007 #else
6008 #endif
6009 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6010 {
6011   Mat            mat  = *mmat;
6012   PetscInt       m    = *mm, n = *mn;
6013   InsertMode     addv = *maddv;
6014   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6015   PetscScalar    value;
6016   PetscErrorCode ierr;
6017 
6018   MatCheckPreallocated(mat,1);
6019   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6020   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6021   {
6022     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6023     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6024     PetscBool roworiented = aij->roworiented;
6025 
6026     /* Some Variables required in the macro */
6027     Mat        A                    = aij->A;
6028     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6029     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6030     MatScalar  *aa                  = a->a;
6031     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6032     Mat        B                    = aij->B;
6033     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6034     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6035     MatScalar  *ba                  = b->a;
6036     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6037      * cannot use "#if defined" inside a macro. */
6038     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6039 
6040     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6041     PetscInt  nonew = a->nonew;
6042     MatScalar *ap1,*ap2;
6043 
6044     PetscFunctionBegin;
6045     for (i=0; i<m; i++) {
6046       if (im[i] < 0) continue;
6047       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6048       if (im[i] >= rstart && im[i] < rend) {
6049         row      = im[i] - rstart;
6050         lastcol1 = -1;
6051         rp1      = aj + ai[row];
6052         ap1      = aa + ai[row];
6053         rmax1    = aimax[row];
6054         nrow1    = ailen[row];
6055         low1     = 0;
6056         high1    = nrow1;
6057         lastcol2 = -1;
6058         rp2      = bj + bi[row];
6059         ap2      = ba + bi[row];
6060         rmax2    = bimax[row];
6061         nrow2    = bilen[row];
6062         low2     = 0;
6063         high2    = nrow2;
6064 
6065         for (j=0; j<n; j++) {
6066           if (roworiented) value = v[i*n+j];
6067           else value = v[i+j*m];
6068           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6069           if (in[j] >= cstart && in[j] < cend) {
6070             col = in[j] - cstart;
6071             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6072 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6073             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6074 #endif
6075           } else if (in[j] < 0) continue;
6076           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6077             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6078             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6079           } else {
6080             if (mat->was_assembled) {
6081               if (!aij->colmap) {
6082                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6083               }
6084 #if defined(PETSC_USE_CTABLE)
6085               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6086               col--;
6087 #else
6088               col = aij->colmap[in[j]] - 1;
6089 #endif
6090               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6091                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6092                 col  =  in[j];
6093                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6094                 B        = aij->B;
6095                 b        = (Mat_SeqAIJ*)B->data;
6096                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6097                 rp2      = bj + bi[row];
6098                 ap2      = ba + bi[row];
6099                 rmax2    = bimax[row];
6100                 nrow2    = bilen[row];
6101                 low2     = 0;
6102                 high2    = nrow2;
6103                 bm       = aij->B->rmap->n;
6104                 ba       = b->a;
6105                 inserted = PETSC_FALSE;
6106               }
6107             } else col = in[j];
6108             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6109 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6110             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6111 #endif
6112           }
6113         }
6114       } else if (!aij->donotstash) {
6115         if (roworiented) {
6116           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6117         } else {
6118           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6119         }
6120       }
6121     }
6122   }
6123   PetscFunctionReturnVoid();
6124 }
6125