xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5e28bcb6eb89755d887ff19df10fe7e2cb7942ae)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
584 #endif
585     if (im[i] >= rstart && im[i] < rend) {
586       row      = im[i] - rstart;
587       lastcol1 = -1;
588       rp1      = aj + ai[row];
589       ap1      = aa + ai[row];
590       rmax1    = aimax[row];
591       nrow1    = ailen[row];
592       low1     = 0;
593       high1    = nrow1;
594       lastcol2 = -1;
595       rp2      = bj + bi[row];
596       ap2      = ba + bi[row];
597       rmax2    = bimax[row];
598       nrow2    = bilen[row];
599       low2     = 0;
600       high2    = nrow2;
601 
602       for (j=0; j<n; j++) {
603         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
604         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
605         if (in[j] >= cstart && in[j] < cend) {
606           col   = in[j] - cstart;
607           nonew = a->nonew;
608           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
610           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
611 #endif
612         } else if (in[j] < 0) continue;
613 #if defined(PETSC_USE_DEBUG)
614         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
615 #endif
616         else {
617           if (mat->was_assembled) {
618             if (!aij->colmap) {
619               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
620             }
621 #if defined(PETSC_USE_CTABLE)
622             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
623             col--;
624 #else
625             col = aij->colmap[in[j]] - 1;
626 #endif
627             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
628               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
629               col  =  in[j];
630               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
631               B        = aij->B;
632               b        = (Mat_SeqAIJ*)B->data;
633               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
634               rp2      = bj + bi[row];
635               ap2      = ba + bi[row];
636               rmax2    = bimax[row];
637               nrow2    = bilen[row];
638               low2     = 0;
639               high2    = nrow2;
640               bm       = aij->B->rmap->n;
641               ba       = b->a;
642               inserted = PETSC_FALSE;
643             } else if (col < 0) {
644               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
645                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
646               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
647             }
648           } else col = in[j];
649           nonew = b->nonew;
650           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
652           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
653 #endif
654         }
655       }
656     } else {
657       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
658       if (!aij->donotstash) {
659         mat->assembled = PETSC_FALSE;
660         if (roworiented) {
661           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
662         } else {
663           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
664         }
665       }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
673     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
674     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
675 */
676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
677 {
678   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
679   Mat            A           = aij->A; /* diagonal part of the matrix */
680   Mat            B           = aij->B; /* offdiagonal part of the matrix */
681   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
682   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
683   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
684   PetscInt       *ailen      = a->ilen,*aj = a->j;
685   PetscInt       *bilen      = b->ilen,*bj = b->j;
686   PetscInt       am          = aij->A->rmap->n,j;
687   PetscInt       diag_so_far = 0,dnz;
688   PetscInt       offd_so_far = 0,onz;
689 
690   PetscFunctionBegin;
691   /* Iterate over all rows of the matrix */
692   for (j=0; j<am; j++) {
693     dnz = onz = 0;
694     /*  Iterate over all non-zero columns of the current row */
695     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
696       /* If column is in the diagonal */
697       if (mat_j[col] >= cstart && mat_j[col] < cend) {
698         aj[diag_so_far++] = mat_j[col] - cstart;
699         dnz++;
700       } else { /* off-diagonal entries */
701         bj[offd_so_far++] = mat_j[col];
702         onz++;
703       }
704     }
705     ailen[j] = dnz;
706     bilen[j] = onz;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 /*
712     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
713     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
714     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
715     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
716     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
717 */
718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
719 {
720   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
721   Mat            A      = aij->A; /* diagonal part of the matrix */
722   Mat            B      = aij->B; /* offdiagonal part of the matrix */
723   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
724   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
725   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
727   PetscInt       *ailen = a->ilen,*aj = a->j;
728   PetscInt       *bilen = b->ilen,*bj = b->j;
729   PetscInt       am     = aij->A->rmap->n,j;
730   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
731   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
732   PetscScalar    *aa = a->a,*ba = b->a;
733 
734   PetscFunctionBegin;
735   /* Iterate over all rows of the matrix */
736   for (j=0; j<am; j++) {
737     dnz_row = onz_row = 0;
738     rowstart_offd = full_offd_i[j];
739     rowstart_diag = full_diag_i[j];
740     /*  Iterate over all non-zero columns of the current row */
741     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
742       /* If column is in the diagonal */
743       if (mat_j[col] >= cstart && mat_j[col] < cend) {
744         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
745         aa[rowstart_diag+dnz_row] = mat_a[col];
746         dnz_row++;
747       } else { /* off-diagonal entries */
748         bj[rowstart_offd+onz_row] = mat_j[col];
749         ba[rowstart_offd+onz_row] = mat_a[col];
750         onz_row++;
751       }
752     }
753     ailen[j] = dnz_row;
754     bilen[j] = onz_row;
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
764   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
765 
766   PetscFunctionBegin;
767   for (i=0; i<m; i++) {
768     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
769     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
770     if (idxm[i] >= rstart && idxm[i] < rend) {
771       row = idxm[i] - rstart;
772       for (j=0; j<n; j++) {
773         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
774         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
775         if (idxn[j] >= cstart && idxn[j] < cend) {
776           col  = idxn[j] - cstart;
777           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
778         } else {
779           if (!aij->colmap) {
780             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
781           }
782 #if defined(PETSC_USE_CTABLE)
783           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
784           col--;
785 #else
786           col = aij->colmap[idxn[j]] - 1;
787 #endif
788           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
789           else {
790             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
800 
801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   PetscErrorCode ierr;
805   PetscInt       nstash,reallocs;
806 
807   PetscFunctionBegin;
808   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
812   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
819   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
820   PetscErrorCode ierr;
821   PetscMPIInt    n;
822   PetscInt       i,j,rstart,ncols,flg;
823   PetscInt       *row,*col;
824   PetscBool      other_disassembled;
825   PetscScalar    *val;
826 
827   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
828 
829   PetscFunctionBegin;
830   if (!aij->donotstash && !mat->nooffprocentries) {
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for (i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for (j=i,rstart=row[j]; j<n; j++) {
838           if (row[j] != rstart) break;
839         }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         /* Now assemble all these values with a single function call */
843         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
844 
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849   }
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,p = 0,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscSFNode       *rrows;
993   PetscSF           sf;
994   const PetscScalar *xx;
995   PetscScalar       *bb,*mask;
996   Vec               xmask,lmask;
997   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
998   const PetscInt    *aj, *ii,*ridx;
999   PetscScalar       *aa;
1000 
1001   PetscFunctionBegin;
1002   /* Create SF where leaves are input rows and roots are owned rows */
1003   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1004   for (r = 0; r < n; ++r) lrows[r] = -1;
1005   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1006   for (r = 0; r < N; ++r) {
1007     const PetscInt idx   = rows[r];
1008     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1009     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1010       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1011     }
1012     rrows[r].rank  = p;
1013     rrows[r].index = rows[r] - owners[p];
1014   }
1015   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1016   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1017   /* Collect flags for rows to be zeroed */
1018   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1019   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1021   /* Compress and put in row numbers */
1022   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1023   /* zero diagonal part of matrix */
1024   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1025   /* handle off diagonal part of matrix */
1026   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1027   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1028   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1029   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1030   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1031   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1034   if (x && b) { /* this code is buggy when the row and column layout don't match */
1035     PetscBool cong;
1036 
1037     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1038     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1039     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1040     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1042     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1043   }
1044   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1045   /* remove zeroed rows of off diagonal matrix */
1046   ii = aij->i;
1047   for (i=0; i<len; i++) {
1048     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1049   }
1050   /* loop over all elements of off process part of matrix zeroing removed columns*/
1051   if (aij->compressedrow.use) {
1052     m    = aij->compressedrow.nrows;
1053     ii   = aij->compressedrow.i;
1054     ridx = aij->compressedrow.rindex;
1055     for (i=0; i<m; i++) {
1056       n  = ii[i+1] - ii[i];
1057       aj = aij->j + ii[i];
1058       aa = aij->a + ii[i];
1059 
1060       for (j=0; j<n; j++) {
1061         if (PetscAbsScalar(mask[*aj])) {
1062           if (b) bb[*ridx] -= *aa*xx[*aj];
1063           *aa = 0.0;
1064         }
1065         aa++;
1066         aj++;
1067       }
1068       ridx++;
1069     }
1070   } else { /* do not use compressed row format */
1071     m = l->B->rmap->n;
1072     for (i=0; i<m; i++) {
1073       n  = ii[i+1] - ii[i];
1074       aj = aij->j + ii[i];
1075       aa = aij->a + ii[i];
1076       for (j=0; j<n; j++) {
1077         if (PetscAbsScalar(mask[*aj])) {
1078           if (b) bb[i] -= *aa*xx[*aj];
1079           *aa = 0.0;
1080         }
1081         aa++;
1082         aj++;
1083       }
1084     }
1085   }
1086   if (x && b) {
1087     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1088     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1089   }
1090   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1091   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1092   ierr = PetscFree(lrows);CHKERRQ(ierr);
1093 
1094   /* only change matrix nonzero state if pattern was allowed to be changed */
1095   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1096     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1097     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1098   }
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107   VecScatter     Mvctx = a->Mvctx;
1108 
1109   PetscFunctionBegin;
1110   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1111   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1112 
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1290 #if defined(PETSC_HAVE_ELEMENTAL)
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1292 #endif
1293 #if defined(PETSC_HAVE_HYPRE)
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1296 #endif
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1303 {
1304   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1305   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1306   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1307   PetscErrorCode ierr;
1308   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1309   int            fd;
1310   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1311   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1312   PetscScalar    *column_values;
1313   PetscInt       message_count,flowcontrolcount;
1314   FILE           *file;
1315 
1316   PetscFunctionBegin;
1317   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1318   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1319   nz   = A->nz + B->nz;
1320   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1321   if (!rank) {
1322     header[0] = MAT_FILE_CLASSID;
1323     header[1] = mat->rmap->N;
1324     header[2] = mat->cmap->N;
1325 
1326     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1328     /* get largest number of rows any processor has */
1329     rlen  = mat->rmap->n;
1330     range = mat->rmap->range;
1331     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1332   } else {
1333     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334     rlen = mat->rmap->n;
1335   }
1336 
1337   /* load up the local row counts */
1338   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1339   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1340 
1341   /* store the row lengths to the file */
1342   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1343   if (!rank) {
1344     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1345     for (i=1; i<size; i++) {
1346       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1347       rlen = range[i+1] - range[i];
1348       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1349       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1350     }
1351     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1352   } else {
1353     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1354     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1355     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1356   }
1357   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1358 
1359   /* load up the local column indices */
1360   nzmax = nz; /* th processor needs space a largest processor needs */
1361   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1363   cnt   = 0;
1364   for (i=0; i<mat->rmap->n; i++) {
1365     for (j=B->i[i]; j<B->i[i+1]; j++) {
1366       if ((col = garray[B->j[j]]) > cstart) break;
1367       column_indices[cnt++] = col;
1368     }
1369     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1370     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1371   }
1372   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1373 
1374   /* store the column indices to the file */
1375   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1376   if (!rank) {
1377     MPI_Status status;
1378     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1379     for (i=1; i<size; i++) {
1380       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1381       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1382       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1383       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1384       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1385     }
1386     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1387   } else {
1388     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1389     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1392   }
1393   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1394 
1395   /* load up the local column values */
1396   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1397   cnt  = 0;
1398   for (i=0; i<mat->rmap->n; i++) {
1399     for (j=B->i[i]; j<B->i[i+1]; j++) {
1400       if (garray[B->j[j]] > cstart) break;
1401       column_values[cnt++] = B->a[j];
1402     }
1403     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1404     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1405   }
1406   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1407 
1408   /* store the column values to the file */
1409   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1410   if (!rank) {
1411     MPI_Status status;
1412     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1413     for (i=1; i<size; i++) {
1414       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1415       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1416       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1417       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1418       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1419     }
1420     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1421   } else {
1422     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1423     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1424     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1426   }
1427   ierr = PetscFree(column_values);CHKERRQ(ierr);
1428 
1429   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1430   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #include <petscdraw.h>
1435 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1436 {
1437   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1438   PetscErrorCode    ierr;
1439   PetscMPIInt       rank = aij->rank,size = aij->size;
1440   PetscBool         isdraw,iascii,isbinary;
1441   PetscViewer       sviewer;
1442   PetscViewerFormat format;
1443 
1444   PetscFunctionBegin;
1445   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1446   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1447   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1448   if (iascii) {
1449     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1450     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1451       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1452       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1453       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1454       for (i=0; i<(PetscInt)size; i++) {
1455         nmax = PetscMax(nmax,nz[i]);
1456         nmin = PetscMin(nmin,nz[i]);
1457         navg += nz[i];
1458       }
1459       ierr = PetscFree(nz);CHKERRQ(ierr);
1460       navg = navg/size;
1461       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1462       PetscFunctionReturn(0);
1463     }
1464     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1465     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1466       MatInfo   info;
1467       PetscBool inodes;
1468 
1469       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1470       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1473       if (!inodes) {
1474         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1475                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1476       } else {
1477         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1478                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1479       }
1480       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1482       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1483       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1484       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1485       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1487       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1488       PetscFunctionReturn(0);
1489     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1490       PetscInt inodecount,inodelimit,*inodes;
1491       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1492       if (inodes) {
1493         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1494       } else {
1495         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1496       }
1497       PetscFunctionReturn(0);
1498     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1499       PetscFunctionReturn(0);
1500     }
1501   } else if (isbinary) {
1502     if (size == 1) {
1503       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1504       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1505     } else {
1506       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1507     }
1508     PetscFunctionReturn(0);
1509   } else if (iascii && size == 1) {
1510     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1511     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1512     PetscFunctionReturn(0);
1513   } else if (isdraw) {
1514     PetscDraw draw;
1515     PetscBool isnull;
1516     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1517     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1518     if (isnull) PetscFunctionReturn(0);
1519   }
1520 
1521   { /* assemble the entire matrix onto first processor */
1522     Mat A = NULL, Av;
1523     IS  isrow,iscol;
1524 
1525     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1526     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1527     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1528     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1529 /*  The commented code uses MatCreateSubMatrices instead */
1530 /*
1531     Mat *AA, A = NULL, Av;
1532     IS  isrow,iscol;
1533 
1534     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1535     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1536     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1537     if (!rank) {
1538        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1539        A    = AA[0];
1540        Av   = AA[0];
1541     }
1542     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1543 */
1544     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1545     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1546     /*
1547        Everyone has to call to draw the matrix since the graphics waits are
1548        synchronized across all processors that share the PetscDraw object
1549     */
1550     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1551     if (!rank) {
1552       if (((PetscObject)mat)->name) {
1553         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1554       }
1555       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1556     }
1557     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1558     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1559     ierr = MatDestroy(&A);CHKERRQ(ierr);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1565 {
1566   PetscErrorCode ierr;
1567   PetscBool      iascii,isdraw,issocket,isbinary;
1568 
1569   PetscFunctionBegin;
1570   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1571   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1572   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1573   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1574   if (iascii || isdraw || isbinary || issocket) {
1575     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1581 {
1582   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1583   PetscErrorCode ierr;
1584   Vec            bb1 = 0;
1585   PetscBool      hasop;
1586 
1587   PetscFunctionBegin;
1588   if (flag == SOR_APPLY_UPPER) {
1589     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1590     PetscFunctionReturn(0);
1591   }
1592 
1593   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1594     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1595   }
1596 
1597   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1598     if (flag & SOR_ZERO_INITIAL_GUESS) {
1599       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1600       its--;
1601     }
1602 
1603     while (its--) {
1604       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606 
1607       /* update rhs: bb1 = bb - B*x */
1608       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1609       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1610 
1611       /* local sweep */
1612       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1613     }
1614   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1615     if (flag & SOR_ZERO_INITIAL_GUESS) {
1616       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1617       its--;
1618     }
1619     while (its--) {
1620       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1621       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1622 
1623       /* update rhs: bb1 = bb - B*x */
1624       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1625       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1626 
1627       /* local sweep */
1628       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1629     }
1630   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1631     if (flag & SOR_ZERO_INITIAL_GUESS) {
1632       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1633       its--;
1634     }
1635     while (its--) {
1636       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1637       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1638 
1639       /* update rhs: bb1 = bb - B*x */
1640       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1641       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1642 
1643       /* local sweep */
1644       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1645     }
1646   } else if (flag & SOR_EISENSTAT) {
1647     Vec xx1;
1648 
1649     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1650     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1651 
1652     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1654     if (!mat->diag) {
1655       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1656       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1657     }
1658     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1659     if (hasop) {
1660       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1661     } else {
1662       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1663     }
1664     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1665 
1666     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1667 
1668     /* local sweep */
1669     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1670     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1671     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1672   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1673 
1674   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1675 
1676   matin->factorerrortype = mat->A->factorerrortype;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1681 {
1682   Mat            aA,aB,Aperm;
1683   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1684   PetscScalar    *aa,*ba;
1685   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1686   PetscSF        rowsf,sf;
1687   IS             parcolp = NULL;
1688   PetscBool      done;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1693   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1694   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1695   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1696 
1697   /* Invert row permutation to find out where my rows should go */
1698   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1699   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1700   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1701   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1702   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1703   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1704 
1705   /* Invert column permutation to find out where my columns should go */
1706   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1707   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1708   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1709   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1710   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1711   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1712   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1713 
1714   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1715   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1716   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1717 
1718   /* Find out where my gcols should go */
1719   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1720   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1721   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1722   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1723   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1724   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1725   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1726   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1727 
1728   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1729   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1730   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1731   for (i=0; i<m; i++) {
1732     PetscInt row = rdest[i],rowner;
1733     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1734     for (j=ai[i]; j<ai[i+1]; j++) {
1735       PetscInt cowner,col = cdest[aj[j]];
1736       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1737       if (rowner == cowner) dnnz[i]++;
1738       else onnz[i]++;
1739     }
1740     for (j=bi[i]; j<bi[i+1]; j++) {
1741       PetscInt cowner,col = gcdest[bj[j]];
1742       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1743       if (rowner == cowner) dnnz[i]++;
1744       else onnz[i]++;
1745     }
1746   }
1747   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1748   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1749   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1750   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1751   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1752 
1753   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1754   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1755   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1756   for (i=0; i<m; i++) {
1757     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1758     PetscInt j0,rowlen;
1759     rowlen = ai[i+1] - ai[i];
1760     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1761       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1762       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1763     }
1764     rowlen = bi[i+1] - bi[i];
1765     for (j0=j=0; j<rowlen; j0=j) {
1766       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1767       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1768     }
1769   }
1770   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1771   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1772   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1773   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1774   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1775   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1776   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1777   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1778   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1779   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1780   *B = Aperm;
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1791   if (ghosts) *ghosts = aij->garray;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1796 {
1797   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1798   Mat            A    = mat->A,B = mat->B;
1799   PetscErrorCode ierr;
1800   PetscLogDouble isend[5],irecv[5];
1801 
1802   PetscFunctionBegin;
1803   info->block_size = 1.0;
1804   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1805 
1806   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1807   isend[3] = info->memory;  isend[4] = info->mallocs;
1808 
1809   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1810 
1811   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1812   isend[3] += info->memory;  isend[4] += info->mallocs;
1813   if (flag == MAT_LOCAL) {
1814     info->nz_used      = isend[0];
1815     info->nz_allocated = isend[1];
1816     info->nz_unneeded  = isend[2];
1817     info->memory       = isend[3];
1818     info->mallocs      = isend[4];
1819   } else if (flag == MAT_GLOBAL_MAX) {
1820     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1821 
1822     info->nz_used      = irecv[0];
1823     info->nz_allocated = irecv[1];
1824     info->nz_unneeded  = irecv[2];
1825     info->memory       = irecv[3];
1826     info->mallocs      = irecv[4];
1827   } else if (flag == MAT_GLOBAL_SUM) {
1828     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1829 
1830     info->nz_used      = irecv[0];
1831     info->nz_allocated = irecv[1];
1832     info->nz_unneeded  = irecv[2];
1833     info->memory       = irecv[3];
1834     info->mallocs      = irecv[4];
1835   }
1836   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1837   info->fill_ratio_needed = 0;
1838   info->factor_mallocs    = 0;
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1843 {
1844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1845   PetscErrorCode ierr;
1846 
1847   PetscFunctionBegin;
1848   switch (op) {
1849   case MAT_NEW_NONZERO_LOCATIONS:
1850   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1851   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1852   case MAT_KEEP_NONZERO_PATTERN:
1853   case MAT_NEW_NONZERO_LOCATION_ERR:
1854   case MAT_USE_INODES:
1855   case MAT_IGNORE_ZERO_ENTRIES:
1856     MatCheckPreallocated(A,1);
1857     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1858     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1859     break;
1860   case MAT_ROW_ORIENTED:
1861     MatCheckPreallocated(A,1);
1862     a->roworiented = flg;
1863 
1864     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1865     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1866     break;
1867   case MAT_NEW_DIAGONALS:
1868   case MAT_SORTED_FULL:
1869     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1870     break;
1871   case MAT_IGNORE_OFF_PROC_ENTRIES:
1872     a->donotstash = flg;
1873     break;
1874   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1875   case MAT_SPD:
1876   case MAT_SYMMETRIC:
1877   case MAT_STRUCTURALLY_SYMMETRIC:
1878   case MAT_HERMITIAN:
1879   case MAT_SYMMETRY_ETERNAL:
1880     break;
1881   case MAT_SUBMAT_SINGLEIS:
1882     A->submat_singleis = flg;
1883     break;
1884   case MAT_STRUCTURE_ONLY:
1885     /* The option is handled directly by MatSetOption() */
1886     break;
1887   default:
1888     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1894 {
1895   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1896   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1897   PetscErrorCode ierr;
1898   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1899   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1900   PetscInt       *cmap,*idx_p;
1901 
1902   PetscFunctionBegin;
1903   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1904   mat->getrowactive = PETSC_TRUE;
1905 
1906   if (!mat->rowvalues && (idx || v)) {
1907     /*
1908         allocate enough space to hold information from the longest row.
1909     */
1910     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1911     PetscInt   max = 1,tmp;
1912     for (i=0; i<matin->rmap->n; i++) {
1913       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1914       if (max < tmp) max = tmp;
1915     }
1916     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1917   }
1918 
1919   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1920   lrow = row - rstart;
1921 
1922   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1923   if (!v)   {pvA = 0; pvB = 0;}
1924   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1925   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   nztot = nzA + nzB;
1928 
1929   cmap = mat->garray;
1930   if (v  || idx) {
1931     if (nztot) {
1932       /* Sort by increasing column numbers, assuming A and B already sorted */
1933       PetscInt imark = -1;
1934       if (v) {
1935         *v = v_p = mat->rowvalues;
1936         for (i=0; i<nzB; i++) {
1937           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1938           else break;
1939         }
1940         imark = i;
1941         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1942         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1943       }
1944       if (idx) {
1945         *idx = idx_p = mat->rowindices;
1946         if (imark > -1) {
1947           for (i=0; i<imark; i++) {
1948             idx_p[i] = cmap[cworkB[i]];
1949           }
1950         } else {
1951           for (i=0; i<nzB; i++) {
1952             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1953             else break;
1954           }
1955           imark = i;
1956         }
1957         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1958         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1959       }
1960     } else {
1961       if (idx) *idx = 0;
1962       if (v)   *v   = 0;
1963     }
1964   }
1965   *nz  = nztot;
1966   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1967   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1972 {
1973   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1974 
1975   PetscFunctionBegin;
1976   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1977   aij->getrowactive = PETSC_FALSE;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1982 {
1983   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1984   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1985   PetscErrorCode ierr;
1986   PetscInt       i,j,cstart = mat->cmap->rstart;
1987   PetscReal      sum = 0.0;
1988   MatScalar      *v;
1989 
1990   PetscFunctionBegin;
1991   if (aij->size == 1) {
1992     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1993   } else {
1994     if (type == NORM_FROBENIUS) {
1995       v = amat->a;
1996       for (i=0; i<amat->nz; i++) {
1997         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1998       }
1999       v = bmat->a;
2000       for (i=0; i<bmat->nz; i++) {
2001         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2002       }
2003       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2004       *norm = PetscSqrtReal(*norm);
2005       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
2006     } else if (type == NORM_1) { /* max column norm */
2007       PetscReal *tmp,*tmp2;
2008       PetscInt  *jj,*garray = aij->garray;
2009       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
2010       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
2011       *norm = 0.0;
2012       v     = amat->a; jj = amat->j;
2013       for (j=0; j<amat->nz; j++) {
2014         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2015       }
2016       v = bmat->a; jj = bmat->j;
2017       for (j=0; j<bmat->nz; j++) {
2018         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2019       }
2020       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2021       for (j=0; j<mat->cmap->N; j++) {
2022         if (tmp2[j] > *norm) *norm = tmp2[j];
2023       }
2024       ierr = PetscFree(tmp);CHKERRQ(ierr);
2025       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2026       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2027     } else if (type == NORM_INFINITY) { /* max row norm */
2028       PetscReal ntemp = 0.0;
2029       for (j=0; j<aij->A->rmap->n; j++) {
2030         v   = amat->a + amat->i[j];
2031         sum = 0.0;
2032         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2033           sum += PetscAbsScalar(*v); v++;
2034         }
2035         v = bmat->a + bmat->i[j];
2036         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2037           sum += PetscAbsScalar(*v); v++;
2038         }
2039         if (sum > ntemp) ntemp = sum;
2040       }
2041       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2042       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2043     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2044   }
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2049 {
2050   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2051   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2052   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2053   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2054   PetscErrorCode  ierr;
2055   Mat             B,A_diag,*B_diag;
2056   const MatScalar *array;
2057 
2058   PetscFunctionBegin;
2059   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2060   ai = Aloc->i; aj = Aloc->j;
2061   bi = Bloc->i; bj = Bloc->j;
2062   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2063     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2064     PetscSFNode          *oloc;
2065     PETSC_UNUSED PetscSF sf;
2066 
2067     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2068     /* compute d_nnz for preallocation */
2069     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2070     for (i=0; i<ai[ma]; i++) {
2071       d_nnz[aj[i]]++;
2072     }
2073     /* compute local off-diagonal contributions */
2074     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2075     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2076     /* map those to global */
2077     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2078     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2079     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2080     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2081     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2082     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2083     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2084 
2085     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2086     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2087     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2088     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2089     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2090     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2091   } else {
2092     B    = *matout;
2093     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2094   }
2095 
2096   b           = (Mat_MPIAIJ*)B->data;
2097   A_diag      = a->A;
2098   B_diag      = &b->A;
2099   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2100   A_diag_ncol = A_diag->cmap->N;
2101   B_diag_ilen = sub_B_diag->ilen;
2102   B_diag_i    = sub_B_diag->i;
2103 
2104   /* Set ilen for diagonal of B */
2105   for (i=0; i<A_diag_ncol; i++) {
2106     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2107   }
2108 
2109   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2110   very quickly (=without using MatSetValues), because all writes are local. */
2111   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2112 
2113   /* copy over the B part */
2114   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2115   array = Bloc->a;
2116   row   = A->rmap->rstart;
2117   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2118   cols_tmp = cols;
2119   for (i=0; i<mb; i++) {
2120     ncol = bi[i+1]-bi[i];
2121     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2122     row++;
2123     array += ncol; cols_tmp += ncol;
2124   }
2125   ierr = PetscFree(cols);CHKERRQ(ierr);
2126 
2127   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2128   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2129   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2130     *matout = B;
2131   } else {
2132     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2138 {
2139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2140   Mat            a    = aij->A,b = aij->B;
2141   PetscErrorCode ierr;
2142   PetscInt       s1,s2,s3;
2143 
2144   PetscFunctionBegin;
2145   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2146   if (rr) {
2147     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2148     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2149     /* Overlap communication with computation. */
2150     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2151   }
2152   if (ll) {
2153     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2154     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2155     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2156   }
2157   /* scale  the diagonal block */
2158   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2159 
2160   if (rr) {
2161     /* Do a scatter end and then right scale the off-diagonal block */
2162     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2163     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2169 {
2170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2179 {
2180   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2181   Mat            a,b,c,d;
2182   PetscBool      flg;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   a = matA->A; b = matA->B;
2187   c = matB->A; d = matB->B;
2188 
2189   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2190   if (flg) {
2191     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2192   }
2193   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2201   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2202 
2203   PetscFunctionBegin;
2204   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2205   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2206     /* because of the column compression in the off-processor part of the matrix a->B,
2207        the number of columns in a->B and b->B may be different, hence we cannot call
2208        the MatCopy() directly on the two parts. If need be, we can provide a more
2209        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2210        then copying the submatrices */
2211     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2212   } else {
2213     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2214     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2215   }
2216   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2221 {
2222   PetscErrorCode ierr;
2223 
2224   PetscFunctionBegin;
2225   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 /*
2230    Computes the number of nonzeros per row needed for preallocation when X and Y
2231    have different nonzero structure.
2232 */
2233 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2234 {
2235   PetscInt       i,j,k,nzx,nzy;
2236 
2237   PetscFunctionBegin;
2238   /* Set the number of nonzeros in the new matrix */
2239   for (i=0; i<m; i++) {
2240     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2241     nzx = xi[i+1] - xi[i];
2242     nzy = yi[i+1] - yi[i];
2243     nnz[i] = 0;
2244     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2245       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2246       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2247       nnz[i]++;
2248     }
2249     for (; k<nzy; k++) nnz[i]++;
2250   }
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2255 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2256 {
2257   PetscErrorCode ierr;
2258   PetscInt       m = Y->rmap->N;
2259   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2260   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2268 {
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2271   PetscBLASInt   bnz,one=1;
2272   Mat_SeqAIJ     *x,*y;
2273 
2274   PetscFunctionBegin;
2275   if (str == SAME_NONZERO_PATTERN) {
2276     PetscScalar alpha = a;
2277     x    = (Mat_SeqAIJ*)xx->A->data;
2278     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2279     y    = (Mat_SeqAIJ*)yy->A->data;
2280     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2281     x    = (Mat_SeqAIJ*)xx->B->data;
2282     y    = (Mat_SeqAIJ*)yy->B->data;
2283     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2284     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2285     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2286     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2287        will be updated */
2288 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2289     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2290       Y->offloadmask = PETSC_OFFLOAD_CPU;
2291     }
2292 #endif
2293   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2294     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2295   } else {
2296     Mat      B;
2297     PetscInt *nnz_d,*nnz_o;
2298     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2299     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2300     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2301     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2302     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2303     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2304     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2305     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2306     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2307     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2308     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2309     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2310     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2311     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2312   }
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2317 
2318 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2319 {
2320 #if defined(PETSC_USE_COMPLEX)
2321   PetscErrorCode ierr;
2322   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2323 
2324   PetscFunctionBegin;
2325   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2326   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2327 #else
2328   PetscFunctionBegin;
2329 #endif
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2334 {
2335   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2336   PetscErrorCode ierr;
2337 
2338   PetscFunctionBegin;
2339   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2340   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2345 {
2346   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2351   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2356 {
2357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2358   PetscErrorCode ierr;
2359   PetscInt       i,*idxb = 0;
2360   PetscScalar    *va,*vb;
2361   Vec            vtmp;
2362 
2363   PetscFunctionBegin;
2364   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2365   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2366   if (idx) {
2367     for (i=0; i<A->rmap->n; i++) {
2368       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2369     }
2370   }
2371 
2372   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2373   if (idx) {
2374     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2375   }
2376   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2377   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2378 
2379   for (i=0; i<A->rmap->n; i++) {
2380     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2381       va[i] = vb[i];
2382       if (idx) idx[i] = a->garray[idxb[i]];
2383     }
2384   }
2385 
2386   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2387   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2388   ierr = PetscFree(idxb);CHKERRQ(ierr);
2389   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt       i,*idxb = 0;
2398   PetscScalar    *va,*vb;
2399   Vec            vtmp;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2404   if (idx) {
2405     for (i=0; i<A->cmap->n; i++) {
2406       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2407     }
2408   }
2409 
2410   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2411   if (idx) {
2412     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2413   }
2414   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2415   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2416 
2417   for (i=0; i<A->rmap->n; i++) {
2418     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2419       va[i] = vb[i];
2420       if (idx) idx[i] = a->garray[idxb[i]];
2421     }
2422   }
2423 
2424   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2426   ierr = PetscFree(idxb);CHKERRQ(ierr);
2427   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2432 {
2433   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2434   PetscInt       n      = A->rmap->n;
2435   PetscInt       cstart = A->cmap->rstart;
2436   PetscInt       *cmap  = mat->garray;
2437   PetscInt       *diagIdx, *offdiagIdx;
2438   Vec            diagV, offdiagV;
2439   PetscScalar    *a, *diagA, *offdiagA;
2440   PetscInt       r;
2441   PetscErrorCode ierr;
2442 
2443   PetscFunctionBegin;
2444   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2447   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2450   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2451   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2452   for (r = 0; r < n; ++r) {
2453     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2454       a[r]   = diagA[r];
2455       idx[r] = cstart + diagIdx[r];
2456     } else {
2457       a[r]   = offdiagA[r];
2458       idx[r] = cmap[offdiagIdx[r]];
2459     }
2460   }
2461   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2464   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2465   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2466   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2471 {
2472   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2473   PetscInt       n      = A->rmap->n;
2474   PetscInt       cstart = A->cmap->rstart;
2475   PetscInt       *cmap  = mat->garray;
2476   PetscInt       *diagIdx, *offdiagIdx;
2477   Vec            diagV, offdiagV;
2478   PetscScalar    *a, *diagA, *offdiagA;
2479   PetscInt       r;
2480   PetscErrorCode ierr;
2481 
2482   PetscFunctionBegin;
2483   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2484   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2485   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2486   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2487   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2488   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2489   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2490   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2491   for (r = 0; r < n; ++r) {
2492     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2493       a[r]   = diagA[r];
2494       idx[r] = cstart + diagIdx[r];
2495     } else {
2496       a[r]   = offdiagA[r];
2497       idx[r] = cmap[offdiagIdx[r]];
2498     }
2499   }
2500   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2501   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2502   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2503   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2504   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2505   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2510 {
2511   PetscErrorCode ierr;
2512   Mat            *dummy;
2513 
2514   PetscFunctionBegin;
2515   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2516   *newmat = *dummy;
2517   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2522 {
2523   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2528   A->factorerrortype = a->A->factorerrortype;
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2533 {
2534   PetscErrorCode ierr;
2535   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2536 
2537   PetscFunctionBegin;
2538   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2539   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2540   if (x->assembled) {
2541     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2542   } else {
2543     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2544   }
2545   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2546   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2551 {
2552   PetscFunctionBegin;
2553   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2554   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 /*@
2559    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2560 
2561    Collective on Mat
2562 
2563    Input Parameters:
2564 +    A - the matrix
2565 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2566 
2567  Level: advanced
2568 
2569 @*/
2570 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2571 {
2572   PetscErrorCode       ierr;
2573 
2574   PetscFunctionBegin;
2575   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2580 {
2581   PetscErrorCode       ierr;
2582   PetscBool            sc = PETSC_FALSE,flg;
2583 
2584   PetscFunctionBegin;
2585   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2586   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2587   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2588   if (flg) {
2589     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2590   }
2591   ierr = PetscOptionsTail();CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2596 {
2597   PetscErrorCode ierr;
2598   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2599   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2600 
2601   PetscFunctionBegin;
2602   if (!Y->preallocated) {
2603     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2604   } else if (!aij->nz) {
2605     PetscInt nonew = aij->nonew;
2606     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2607     aij->nonew = nonew;
2608   }
2609   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2614 {
2615   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2616   PetscErrorCode ierr;
2617 
2618   PetscFunctionBegin;
2619   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2620   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2621   if (d) {
2622     PetscInt rstart;
2623     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2624     *d += rstart;
2625 
2626   }
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2631 {
2632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2633   PetscErrorCode ierr;
2634 
2635   PetscFunctionBegin;
2636   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 /* -------------------------------------------------------------------*/
2641 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2642                                        MatGetRow_MPIAIJ,
2643                                        MatRestoreRow_MPIAIJ,
2644                                        MatMult_MPIAIJ,
2645                                 /* 4*/ MatMultAdd_MPIAIJ,
2646                                        MatMultTranspose_MPIAIJ,
2647                                        MatMultTransposeAdd_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*10*/ 0,
2652                                        0,
2653                                        0,
2654                                        MatSOR_MPIAIJ,
2655                                        MatTranspose_MPIAIJ,
2656                                 /*15*/ MatGetInfo_MPIAIJ,
2657                                        MatEqual_MPIAIJ,
2658                                        MatGetDiagonal_MPIAIJ,
2659                                        MatDiagonalScale_MPIAIJ,
2660                                        MatNorm_MPIAIJ,
2661                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2662                                        MatAssemblyEnd_MPIAIJ,
2663                                        MatSetOption_MPIAIJ,
2664                                        MatZeroEntries_MPIAIJ,
2665                                 /*24*/ MatZeroRows_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*29*/ MatSetUp_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        MatGetDiagonalBlock_MPIAIJ,
2674                                        0,
2675                                 /*34*/ MatDuplicate_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*39*/ MatAXPY_MPIAIJ,
2681                                        MatCreateSubMatrices_MPIAIJ,
2682                                        MatIncreaseOverlap_MPIAIJ,
2683                                        MatGetValues_MPIAIJ,
2684                                        MatCopy_MPIAIJ,
2685                                 /*44*/ MatGetRowMax_MPIAIJ,
2686                                        MatScale_MPIAIJ,
2687                                        MatShift_MPIAIJ,
2688                                        MatDiagonalSet_MPIAIJ,
2689                                        MatZeroRowsColumns_MPIAIJ,
2690                                 /*49*/ MatSetRandom_MPIAIJ,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2696                                        0,
2697                                        MatSetUnfactored_MPIAIJ,
2698                                        MatPermute_MPIAIJ,
2699                                        0,
2700                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2701                                        MatDestroy_MPIAIJ,
2702                                        MatView_MPIAIJ,
2703                                        0,
2704                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2705                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2706                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2711                                        MatGetRowMinAbs_MPIAIJ,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                 /*75*/ MatFDColoringApply_AIJ,
2717                                        MatSetFromOptions_MPIAIJ,
2718                                        0,
2719                                        0,
2720                                        MatFindZeroDiagonals_MPIAIJ,
2721                                 /*80*/ 0,
2722                                        0,
2723                                        0,
2724                                 /*83*/ MatLoad_MPIAIJ,
2725                                        MatIsSymmetric_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2731                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2732                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2733                                        MatPtAP_MPIAIJ_MPIAIJ,
2734                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2735                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                        MatPinToCPU_MPIAIJ,
2740                                 /*99*/ 0,
2741                                        0,
2742                                        0,
2743                                        MatConjugate_MPIAIJ,
2744                                        0,
2745                                 /*104*/MatSetValuesRow_MPIAIJ,
2746                                        MatRealPart_MPIAIJ,
2747                                        MatImaginaryPart_MPIAIJ,
2748                                        0,
2749                                        0,
2750                                 /*109*/0,
2751                                        0,
2752                                        MatGetRowMin_MPIAIJ,
2753                                        0,
2754                                        MatMissingDiagonal_MPIAIJ,
2755                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2756                                        0,
2757                                        MatGetGhosts_MPIAIJ,
2758                                        0,
2759                                        0,
2760                                 /*119*/0,
2761                                        0,
2762                                        0,
2763                                        0,
2764                                        MatGetMultiProcBlock_MPIAIJ,
2765                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2766                                        MatGetColumnNorms_MPIAIJ,
2767                                        MatInvertBlockDiagonal_MPIAIJ,
2768                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2769                                        MatCreateSubMatricesMPI_MPIAIJ,
2770                                 /*129*/0,
2771                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2772                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2773                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2774                                        0,
2775                                 /*134*/0,
2776                                        0,
2777                                        MatRARt_MPIAIJ_MPIAIJ,
2778                                        0,
2779                                        0,
2780                                 /*139*/MatSetBlockSizes_MPIAIJ,
2781                                        0,
2782                                        0,
2783                                        MatFDColoringSetUp_MPIXAIJ,
2784                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2785                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2786 };
2787 
2788 /* ----------------------------------------------------------------------------------------*/
2789 
2790 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2791 {
2792   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2793   PetscErrorCode ierr;
2794 
2795   PetscFunctionBegin;
2796   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2797   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2802 {
2803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2808   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2813 {
2814   Mat_MPIAIJ     *b;
2815   PetscErrorCode ierr;
2816   PetscMPIInt    size;
2817 
2818   PetscFunctionBegin;
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   b = (Mat_MPIAIJ*)B->data;
2822 
2823 #if defined(PETSC_USE_CTABLE)
2824   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2825 #else
2826   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2827 #endif
2828   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2829   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2830   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2831 
2832   /* Because the B will have been resized we simply destroy it and create a new one each time */
2833   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2834   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2835   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2836   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2837   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2838   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2839   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2840 
2841   if (!B->preallocated) {
2842     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2843     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2844     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2845     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2846     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2847   }
2848 
2849   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2850   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2851   B->preallocated  = PETSC_TRUE;
2852   B->was_assembled = PETSC_FALSE;
2853   B->assembled     = PETSC_FALSE;
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2858 {
2859   Mat_MPIAIJ     *b;
2860   PetscErrorCode ierr;
2861 
2862   PetscFunctionBegin;
2863   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2864   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2865   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2866   b = (Mat_MPIAIJ*)B->data;
2867 
2868 #if defined(PETSC_USE_CTABLE)
2869   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2870 #else
2871   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2872 #endif
2873   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2874   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2875   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2876 
2877   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2878   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2879   B->preallocated  = PETSC_TRUE;
2880   B->was_assembled = PETSC_FALSE;
2881   B->assembled = PETSC_FALSE;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2886 {
2887   Mat            mat;
2888   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2889   PetscErrorCode ierr;
2890 
2891   PetscFunctionBegin;
2892   *newmat = 0;
2893   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2894   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2895   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2896   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2897   a       = (Mat_MPIAIJ*)mat->data;
2898 
2899   mat->factortype   = matin->factortype;
2900   mat->assembled    = PETSC_TRUE;
2901   mat->insertmode   = NOT_SET_VALUES;
2902   mat->preallocated = PETSC_TRUE;
2903 
2904   a->size         = oldmat->size;
2905   a->rank         = oldmat->rank;
2906   a->donotstash   = oldmat->donotstash;
2907   a->roworiented  = oldmat->roworiented;
2908   a->rowindices   = 0;
2909   a->rowvalues    = 0;
2910   a->getrowactive = PETSC_FALSE;
2911 
2912   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2913   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2914 
2915   if (oldmat->colmap) {
2916 #if defined(PETSC_USE_CTABLE)
2917     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2918 #else
2919     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2920     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2921     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2922 #endif
2923   } else a->colmap = 0;
2924   if (oldmat->garray) {
2925     PetscInt len;
2926     len  = oldmat->B->cmap->n;
2927     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2928     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2929     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2930   } else a->garray = 0;
2931 
2932   /* It may happen MatDuplicate is called with a non-assembled matrix
2933      In fact, MatDuplicate only requires the matrix to be preallocated
2934      This may happen inside a DMCreateMatrix_Shell */
2935   if (oldmat->lvec) {
2936     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2937     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2938   }
2939   if (oldmat->Mvctx) {
2940     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2941     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2942   }
2943   if (oldmat->Mvctx_mpi1) {
2944     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2945     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2946   }
2947 
2948   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2949   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2950   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2951   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2952   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2953   *newmat = mat;
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2958 {
2959   PetscBool      isbinary, ishdf5;
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2964   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2965   /* force binary viewer to load .info file if it has not yet done so */
2966   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2967   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2968   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2969   if (isbinary) {
2970     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2971   } else if (ishdf5) {
2972 #if defined(PETSC_HAVE_HDF5)
2973     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2974 #else
2975     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2976 #endif
2977   } else {
2978     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2979   }
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2984 {
2985   PetscScalar    *vals,*svals;
2986   MPI_Comm       comm;
2987   PetscErrorCode ierr;
2988   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2989   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2990   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2991   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2992   PetscInt       cend,cstart,n,*rowners;
2993   int            fd;
2994   PetscInt       bs = newMat->rmap->bs;
2995 
2996   PetscFunctionBegin;
2997   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2998   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2999   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3000   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3001   if (!rank) {
3002     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3003     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3004     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
3005   }
3006 
3007   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
3008   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3009   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3010   if (bs < 0) bs = 1;
3011 
3012   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3013   M    = header[1]; N = header[2];
3014 
3015   /* If global sizes are set, check if they are consistent with that given in the file */
3016   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3017   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3018 
3019   /* determine ownership of all (block) rows */
3020   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3021   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3022   else m = newMat->rmap->n; /* Set by user */
3023 
3024   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3025   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3026 
3027   /* First process needs enough room for process with most rows */
3028   if (!rank) {
3029     mmax = rowners[1];
3030     for (i=2; i<=size; i++) {
3031       mmax = PetscMax(mmax, rowners[i]);
3032     }
3033   } else mmax = -1;             /* unused, but compilers complain */
3034 
3035   rowners[0] = 0;
3036   for (i=2; i<=size; i++) {
3037     rowners[i] += rowners[i-1];
3038   }
3039   rstart = rowners[rank];
3040   rend   = rowners[rank+1];
3041 
3042   /* distribute row lengths to all processors */
3043   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3044   if (!rank) {
3045     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3046     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3047     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3048     for (j=0; j<m; j++) {
3049       procsnz[0] += ourlens[j];
3050     }
3051     for (i=1; i<size; i++) {
3052       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3053       /* calculate the number of nonzeros on each processor */
3054       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3055         procsnz[i] += rowlengths[j];
3056       }
3057       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3058     }
3059     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3060   } else {
3061     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3062   }
3063 
3064   if (!rank) {
3065     /* determine max buffer needed and allocate it */
3066     maxnz = 0;
3067     for (i=0; i<size; i++) {
3068       maxnz = PetscMax(maxnz,procsnz[i]);
3069     }
3070     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3071 
3072     /* read in my part of the matrix column indices  */
3073     nz   = procsnz[0];
3074     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3075     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3076 
3077     /* read in every one elses and ship off */
3078     for (i=1; i<size; i++) {
3079       nz   = procsnz[i];
3080       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3081       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3082     }
3083     ierr = PetscFree(cols);CHKERRQ(ierr);
3084   } else {
3085     /* determine buffer space needed for message */
3086     nz = 0;
3087     for (i=0; i<m; i++) {
3088       nz += ourlens[i];
3089     }
3090     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3091 
3092     /* receive message of column indices*/
3093     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3094   }
3095 
3096   /* determine column ownership if matrix is not square */
3097   if (N != M) {
3098     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3099     else n = newMat->cmap->n;
3100     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3101     cstart = cend - n;
3102   } else {
3103     cstart = rstart;
3104     cend   = rend;
3105     n      = cend - cstart;
3106   }
3107 
3108   /* loop over local rows, determining number of off diagonal entries */
3109   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3110   jj   = 0;
3111   for (i=0; i<m; i++) {
3112     for (j=0; j<ourlens[i]; j++) {
3113       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3114       jj++;
3115     }
3116   }
3117 
3118   for (i=0; i<m; i++) {
3119     ourlens[i] -= offlens[i];
3120   }
3121   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3122 
3123   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3124 
3125   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3126 
3127   for (i=0; i<m; i++) {
3128     ourlens[i] += offlens[i];
3129   }
3130 
3131   if (!rank) {
3132     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3133 
3134     /* read in my part of the matrix numerical values  */
3135     nz   = procsnz[0];
3136     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3137 
3138     /* insert into matrix */
3139     jj      = rstart;
3140     smycols = mycols;
3141     svals   = vals;
3142     for (i=0; i<m; i++) {
3143       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3144       smycols += ourlens[i];
3145       svals   += ourlens[i];
3146       jj++;
3147     }
3148 
3149     /* read in other processors and ship out */
3150     for (i=1; i<size; i++) {
3151       nz   = procsnz[i];
3152       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3153       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3154     }
3155     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3156   } else {
3157     /* receive numeric values */
3158     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3159 
3160     /* receive message of values*/
3161     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3162 
3163     /* insert into matrix */
3164     jj      = rstart;
3165     smycols = mycols;
3166     svals   = vals;
3167     for (i=0; i<m; i++) {
3168       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3169       smycols += ourlens[i];
3170       svals   += ourlens[i];
3171       jj++;
3172     }
3173   }
3174   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3175   ierr = PetscFree(vals);CHKERRQ(ierr);
3176   ierr = PetscFree(mycols);CHKERRQ(ierr);
3177   ierr = PetscFree(rowners);CHKERRQ(ierr);
3178   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3179   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 /* Not scalable because of ISAllGather() unless getting all columns. */
3184 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3185 {
3186   PetscErrorCode ierr;
3187   IS             iscol_local;
3188   PetscBool      isstride;
3189   PetscMPIInt    lisstride=0,gisstride;
3190 
3191   PetscFunctionBegin;
3192   /* check if we are grabbing all columns*/
3193   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3194 
3195   if (isstride) {
3196     PetscInt  start,len,mstart,mlen;
3197     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3198     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3199     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3200     if (mstart == start && mlen-mstart == len) lisstride = 1;
3201   }
3202 
3203   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3204   if (gisstride) {
3205     PetscInt N;
3206     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3207     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3208     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3209     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3210   } else {
3211     PetscInt cbs;
3212     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3213     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3214     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3215   }
3216 
3217   *isseq = iscol_local;
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 /*
3222  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3223  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3224 
3225  Input Parameters:
3226    mat - matrix
3227    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3228            i.e., mat->rstart <= isrow[i] < mat->rend
3229    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3230            i.e., mat->cstart <= iscol[i] < mat->cend
3231  Output Parameter:
3232    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3233    iscol_o - sequential column index set for retrieving mat->B
3234    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3235  */
3236 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3237 {
3238   PetscErrorCode ierr;
3239   Vec            x,cmap;
3240   const PetscInt *is_idx;
3241   PetscScalar    *xarray,*cmaparray;
3242   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3243   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3244   Mat            B=a->B;
3245   Vec            lvec=a->lvec,lcmap;
3246   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3247   MPI_Comm       comm;
3248   VecScatter     Mvctx=a->Mvctx;
3249 
3250   PetscFunctionBegin;
3251   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3252   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3253 
3254   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3255   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3256   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3257   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3258   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3259 
3260   /* Get start indices */
3261   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3262   isstart -= ncols;
3263   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3264 
3265   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3266   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3267   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3268   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3269   for (i=0; i<ncols; i++) {
3270     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3271     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3272     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3273   }
3274   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3275   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3276   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3277 
3278   /* Get iscol_d */
3279   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3280   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3281   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3282 
3283   /* Get isrow_d */
3284   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3285   rstart = mat->rmap->rstart;
3286   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3287   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3288   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3289   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3290 
3291   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3292   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3293   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3294 
3295   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3296   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3297   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3298 
3299   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3300 
3301   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3302   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3303 
3304   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3305   /* off-process column indices */
3306   count = 0;
3307   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3308   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3309 
3310   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3311   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3312   for (i=0; i<Bn; i++) {
3313     if (PetscRealPart(xarray[i]) > -1.0) {
3314       idx[count]     = i;                   /* local column index in off-diagonal part B */
3315       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3316       count++;
3317     }
3318   }
3319   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3320   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3321 
3322   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3323   /* cannot ensure iscol_o has same blocksize as iscol! */
3324 
3325   ierr = PetscFree(idx);CHKERRQ(ierr);
3326   *garray = cmap1;
3327 
3328   ierr = VecDestroy(&x);CHKERRQ(ierr);
3329   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3330   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3335 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3336 {
3337   PetscErrorCode ierr;
3338   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3339   Mat            M = NULL;
3340   MPI_Comm       comm;
3341   IS             iscol_d,isrow_d,iscol_o;
3342   Mat            Asub = NULL,Bsub = NULL;
3343   PetscInt       n;
3344 
3345   PetscFunctionBegin;
3346   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3347 
3348   if (call == MAT_REUSE_MATRIX) {
3349     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3350     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3351     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3352 
3353     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3354     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3355 
3356     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3357     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3358 
3359     /* Update diagonal and off-diagonal portions of submat */
3360     asub = (Mat_MPIAIJ*)(*submat)->data;
3361     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3362     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3363     if (n) {
3364       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3365     }
3366     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3367     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3368 
3369   } else { /* call == MAT_INITIAL_MATRIX) */
3370     const PetscInt *garray;
3371     PetscInt        BsubN;
3372 
3373     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3374     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3375 
3376     /* Create local submatrices Asub and Bsub */
3377     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3378     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3379 
3380     /* Create submatrix M */
3381     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3382 
3383     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3384     asub = (Mat_MPIAIJ*)M->data;
3385 
3386     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3387     n = asub->B->cmap->N;
3388     if (BsubN > n) {
3389       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3390       const PetscInt *idx;
3391       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3392       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3393 
3394       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3395       j = 0;
3396       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3397       for (i=0; i<n; i++) {
3398         if (j >= BsubN) break;
3399         while (subgarray[i] > garray[j]) j++;
3400 
3401         if (subgarray[i] == garray[j]) {
3402           idx_new[i] = idx[j++];
3403         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3404       }
3405       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3406 
3407       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3408       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3409 
3410     } else if (BsubN < n) {
3411       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3412     }
3413 
3414     ierr = PetscFree(garray);CHKERRQ(ierr);
3415     *submat = M;
3416 
3417     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3418     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3419     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3420 
3421     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3422     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3423 
3424     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3425     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3426   }
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3431 {
3432   PetscErrorCode ierr;
3433   IS             iscol_local=NULL,isrow_d;
3434   PetscInt       csize;
3435   PetscInt       n,i,j,start,end;
3436   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3437   MPI_Comm       comm;
3438 
3439   PetscFunctionBegin;
3440   /* If isrow has same processor distribution as mat,
3441      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3442   if (call == MAT_REUSE_MATRIX) {
3443     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3444     if (isrow_d) {
3445       sameRowDist  = PETSC_TRUE;
3446       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3447     } else {
3448       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3449       if (iscol_local) {
3450         sameRowDist  = PETSC_TRUE;
3451         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3452       }
3453     }
3454   } else {
3455     /* Check if isrow has same processor distribution as mat */
3456     sameDist[0] = PETSC_FALSE;
3457     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3458     if (!n) {
3459       sameDist[0] = PETSC_TRUE;
3460     } else {
3461       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3462       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3463       if (i >= start && j < end) {
3464         sameDist[0] = PETSC_TRUE;
3465       }
3466     }
3467 
3468     /* Check if iscol has same processor distribution as mat */
3469     sameDist[1] = PETSC_FALSE;
3470     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3471     if (!n) {
3472       sameDist[1] = PETSC_TRUE;
3473     } else {
3474       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3475       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3476       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3477     }
3478 
3479     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3480     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3481     sameRowDist = tsameDist[0];
3482   }
3483 
3484   if (sameRowDist) {
3485     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3486       /* isrow and iscol have same processor distribution as mat */
3487       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3488       PetscFunctionReturn(0);
3489     } else { /* sameRowDist */
3490       /* isrow has same processor distribution as mat */
3491       if (call == MAT_INITIAL_MATRIX) {
3492         PetscBool sorted;
3493         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3494         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3495         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3496         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3497 
3498         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3499         if (sorted) {
3500           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3501           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3502           PetscFunctionReturn(0);
3503         }
3504       } else { /* call == MAT_REUSE_MATRIX */
3505         IS    iscol_sub;
3506         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3507         if (iscol_sub) {
3508           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3509           PetscFunctionReturn(0);
3510         }
3511       }
3512     }
3513   }
3514 
3515   /* General case: iscol -> iscol_local which has global size of iscol */
3516   if (call == MAT_REUSE_MATRIX) {
3517     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3518     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3519   } else {
3520     if (!iscol_local) {
3521       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3522     }
3523   }
3524 
3525   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3526   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3527 
3528   if (call == MAT_INITIAL_MATRIX) {
3529     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3530     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3531   }
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 /*@C
3536      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3537          and "off-diagonal" part of the matrix in CSR format.
3538 
3539    Collective
3540 
3541    Input Parameters:
3542 +  comm - MPI communicator
3543 .  A - "diagonal" portion of matrix
3544 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3545 -  garray - global index of B columns
3546 
3547    Output Parameter:
3548 .   mat - the matrix, with input A as its local diagonal matrix
3549    Level: advanced
3550 
3551    Notes:
3552        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3553        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3554 
3555 .seealso: MatCreateMPIAIJWithSplitArrays()
3556 @*/
3557 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3558 {
3559   PetscErrorCode ierr;
3560   Mat_MPIAIJ     *maij;
3561   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3562   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3563   PetscScalar    *oa=b->a;
3564   Mat            Bnew;
3565   PetscInt       m,n,N;
3566 
3567   PetscFunctionBegin;
3568   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3569   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3570   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3571   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3572   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3573   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3574 
3575   /* Get global columns of mat */
3576   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3577 
3578   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3579   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3580   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3581   maij = (Mat_MPIAIJ*)(*mat)->data;
3582 
3583   (*mat)->preallocated = PETSC_TRUE;
3584 
3585   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3586   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3587 
3588   /* Set A as diagonal portion of *mat */
3589   maij->A = A;
3590 
3591   nz = oi[m];
3592   for (i=0; i<nz; i++) {
3593     col   = oj[i];
3594     oj[i] = garray[col];
3595   }
3596 
3597    /* Set Bnew as off-diagonal portion of *mat */
3598   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3599   bnew        = (Mat_SeqAIJ*)Bnew->data;
3600   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3601   maij->B     = Bnew;
3602 
3603   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3604 
3605   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3606   b->free_a       = PETSC_FALSE;
3607   b->free_ij      = PETSC_FALSE;
3608   ierr = MatDestroy(&B);CHKERRQ(ierr);
3609 
3610   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3611   bnew->free_a       = PETSC_TRUE;
3612   bnew->free_ij      = PETSC_TRUE;
3613 
3614   /* condense columns of maij->B */
3615   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3616   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3617   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3619   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3620   PetscFunctionReturn(0);
3621 }
3622 
3623 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3624 
3625 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3626 {
3627   PetscErrorCode ierr;
3628   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3629   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3630   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3631   Mat            M,Msub,B=a->B;
3632   MatScalar      *aa;
3633   Mat_SeqAIJ     *aij;
3634   PetscInt       *garray = a->garray,*colsub,Ncols;
3635   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3636   IS             iscol_sub,iscmap;
3637   const PetscInt *is_idx,*cmap;
3638   PetscBool      allcolumns=PETSC_FALSE;
3639   MPI_Comm       comm;
3640 
3641   PetscFunctionBegin;
3642   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3643 
3644   if (call == MAT_REUSE_MATRIX) {
3645     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3646     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3647     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3648 
3649     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3650     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3651 
3652     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3653     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3654 
3655     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3656 
3657   } else { /* call == MAT_INITIAL_MATRIX) */
3658     PetscBool flg;
3659 
3660     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3661     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3662 
3663     /* (1) iscol -> nonscalable iscol_local */
3664     /* Check for special case: each processor gets entire matrix columns */
3665     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3666     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3667     if (allcolumns) {
3668       iscol_sub = iscol_local;
3669       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3670       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3671 
3672     } else {
3673       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3674       PetscInt *idx,*cmap1,k;
3675       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3676       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3677       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3678       count = 0;
3679       k     = 0;
3680       for (i=0; i<Ncols; i++) {
3681         j = is_idx[i];
3682         if (j >= cstart && j < cend) {
3683           /* diagonal part of mat */
3684           idx[count]     = j;
3685           cmap1[count++] = i; /* column index in submat */
3686         } else if (Bn) {
3687           /* off-diagonal part of mat */
3688           if (j == garray[k]) {
3689             idx[count]     = j;
3690             cmap1[count++] = i;  /* column index in submat */
3691           } else if (j > garray[k]) {
3692             while (j > garray[k] && k < Bn-1) k++;
3693             if (j == garray[k]) {
3694               idx[count]     = j;
3695               cmap1[count++] = i; /* column index in submat */
3696             }
3697           }
3698         }
3699       }
3700       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3701 
3702       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3703       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3704       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3705 
3706       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3707     }
3708 
3709     /* (3) Create sequential Msub */
3710     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3711   }
3712 
3713   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3714   aij  = (Mat_SeqAIJ*)(Msub)->data;
3715   ii   = aij->i;
3716   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3717 
3718   /*
3719       m - number of local rows
3720       Ncols - number of columns (same on all processors)
3721       rstart - first row in new global matrix generated
3722   */
3723   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3724 
3725   if (call == MAT_INITIAL_MATRIX) {
3726     /* (4) Create parallel newmat */
3727     PetscMPIInt    rank,size;
3728     PetscInt       csize;
3729 
3730     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3731     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3732 
3733     /*
3734         Determine the number of non-zeros in the diagonal and off-diagonal
3735         portions of the matrix in order to do correct preallocation
3736     */
3737 
3738     /* first get start and end of "diagonal" columns */
3739     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3740     if (csize == PETSC_DECIDE) {
3741       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3742       if (mglobal == Ncols) { /* square matrix */
3743         nlocal = m;
3744       } else {
3745         nlocal = Ncols/size + ((Ncols % size) > rank);
3746       }
3747     } else {
3748       nlocal = csize;
3749     }
3750     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3751     rstart = rend - nlocal;
3752     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3753 
3754     /* next, compute all the lengths */
3755     jj    = aij->j;
3756     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3757     olens = dlens + m;
3758     for (i=0; i<m; i++) {
3759       jend = ii[i+1] - ii[i];
3760       olen = 0;
3761       dlen = 0;
3762       for (j=0; j<jend; j++) {
3763         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3764         else dlen++;
3765         jj++;
3766       }
3767       olens[i] = olen;
3768       dlens[i] = dlen;
3769     }
3770 
3771     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3772     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3773 
3774     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3775     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3776     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3777     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3778     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3779     ierr = PetscFree(dlens);CHKERRQ(ierr);
3780 
3781   } else { /* call == MAT_REUSE_MATRIX */
3782     M    = *newmat;
3783     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3784     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3785     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3786     /*
3787          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3788        rather than the slower MatSetValues().
3789     */
3790     M->was_assembled = PETSC_TRUE;
3791     M->assembled     = PETSC_FALSE;
3792   }
3793 
3794   /* (5) Set values of Msub to *newmat */
3795   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3796   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3797 
3798   jj   = aij->j;
3799   aa   = aij->a;
3800   for (i=0; i<m; i++) {
3801     row = rstart + i;
3802     nz  = ii[i+1] - ii[i];
3803     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3804     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3805     jj += nz; aa += nz;
3806   }
3807   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3808 
3809   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811 
3812   ierr = PetscFree(colsub);CHKERRQ(ierr);
3813 
3814   /* save Msub, iscol_sub and iscmap used in processor for next request */
3815   if (call ==  MAT_INITIAL_MATRIX) {
3816     *newmat = M;
3817     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3818     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3819 
3820     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3821     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3822 
3823     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3824     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3825 
3826     if (iscol_local) {
3827       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3828       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3829     }
3830   }
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 /*
3835     Not great since it makes two copies of the submatrix, first an SeqAIJ
3836   in local and then by concatenating the local matrices the end result.
3837   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3838 
3839   Note: This requires a sequential iscol with all indices.
3840 */
3841 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3842 {
3843   PetscErrorCode ierr;
3844   PetscMPIInt    rank,size;
3845   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3846   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3847   Mat            M,Mreuse;
3848   MatScalar      *aa,*vwork;
3849   MPI_Comm       comm;
3850   Mat_SeqAIJ     *aij;
3851   PetscBool      colflag,allcolumns=PETSC_FALSE;
3852 
3853   PetscFunctionBegin;
3854   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3855   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3856   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3857 
3858   /* Check for special case: each processor gets entire matrix columns */
3859   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3860   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3861   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3862 
3863   if (call ==  MAT_REUSE_MATRIX) {
3864     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3865     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3866     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3867   } else {
3868     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3869   }
3870 
3871   /*
3872       m - number of local rows
3873       n - number of columns (same on all processors)
3874       rstart - first row in new global matrix generated
3875   */
3876   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3877   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3878   if (call == MAT_INITIAL_MATRIX) {
3879     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3880     ii  = aij->i;
3881     jj  = aij->j;
3882 
3883     /*
3884         Determine the number of non-zeros in the diagonal and off-diagonal
3885         portions of the matrix in order to do correct preallocation
3886     */
3887 
3888     /* first get start and end of "diagonal" columns */
3889     if (csize == PETSC_DECIDE) {
3890       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3891       if (mglobal == n) { /* square matrix */
3892         nlocal = m;
3893       } else {
3894         nlocal = n/size + ((n % size) > rank);
3895       }
3896     } else {
3897       nlocal = csize;
3898     }
3899     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3900     rstart = rend - nlocal;
3901     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3902 
3903     /* next, compute all the lengths */
3904     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3905     olens = dlens + m;
3906     for (i=0; i<m; i++) {
3907       jend = ii[i+1] - ii[i];
3908       olen = 0;
3909       dlen = 0;
3910       for (j=0; j<jend; j++) {
3911         if (*jj < rstart || *jj >= rend) olen++;
3912         else dlen++;
3913         jj++;
3914       }
3915       olens[i] = olen;
3916       dlens[i] = dlen;
3917     }
3918     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3919     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3920     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3921     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3922     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3923     ierr = PetscFree(dlens);CHKERRQ(ierr);
3924   } else {
3925     PetscInt ml,nl;
3926 
3927     M    = *newmat;
3928     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3929     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3930     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3931     /*
3932          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3933        rather than the slower MatSetValues().
3934     */
3935     M->was_assembled = PETSC_TRUE;
3936     M->assembled     = PETSC_FALSE;
3937   }
3938   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3939   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3940   ii   = aij->i;
3941   jj   = aij->j;
3942   aa   = aij->a;
3943   for (i=0; i<m; i++) {
3944     row   = rstart + i;
3945     nz    = ii[i+1] - ii[i];
3946     cwork = jj;     jj += nz;
3947     vwork = aa;     aa += nz;
3948     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3949   }
3950 
3951   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3952   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   *newmat = M;
3954 
3955   /* save submatrix used in processor for next request */
3956   if (call ==  MAT_INITIAL_MATRIX) {
3957     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3958     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3959   }
3960   PetscFunctionReturn(0);
3961 }
3962 
3963 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3964 {
3965   PetscInt       m,cstart, cend,j,nnz,i,d;
3966   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3967   const PetscInt *JJ;
3968   PetscErrorCode ierr;
3969   PetscBool      nooffprocentries;
3970 
3971   PetscFunctionBegin;
3972   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3973 
3974   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3975   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3976   m      = B->rmap->n;
3977   cstart = B->cmap->rstart;
3978   cend   = B->cmap->rend;
3979   rstart = B->rmap->rstart;
3980 
3981   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3982 
3983 #if defined(PETSC_USE_DEBUG)
3984   for (i=0; i<m; i++) {
3985     nnz = Ii[i+1]- Ii[i];
3986     JJ  = J + Ii[i];
3987     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3988     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3989     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3990   }
3991 #endif
3992 
3993   for (i=0; i<m; i++) {
3994     nnz     = Ii[i+1]- Ii[i];
3995     JJ      = J + Ii[i];
3996     nnz_max = PetscMax(nnz_max,nnz);
3997     d       = 0;
3998     for (j=0; j<nnz; j++) {
3999       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4000     }
4001     d_nnz[i] = d;
4002     o_nnz[i] = nnz - d;
4003   }
4004   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4005   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4006 
4007   for (i=0; i<m; i++) {
4008     ii   = i + rstart;
4009     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4010   }
4011   nooffprocentries    = B->nooffprocentries;
4012   B->nooffprocentries = PETSC_TRUE;
4013   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4014   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4015   B->nooffprocentries = nooffprocentries;
4016 
4017   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4018   PetscFunctionReturn(0);
4019 }
4020 
4021 /*@
4022    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4023    (the default parallel PETSc format).
4024 
4025    Collective
4026 
4027    Input Parameters:
4028 +  B - the matrix
4029 .  i - the indices into j for the start of each local row (starts with zero)
4030 .  j - the column indices for each local row (starts with zero)
4031 -  v - optional values in the matrix
4032 
4033    Level: developer
4034 
4035    Notes:
4036        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4037      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4038      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4039 
4040        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4041 
4042        The format which is used for the sparse matrix input, is equivalent to a
4043     row-major ordering.. i.e for the following matrix, the input data expected is
4044     as shown
4045 
4046 $        1 0 0
4047 $        2 0 3     P0
4048 $       -------
4049 $        4 5 6     P1
4050 $
4051 $     Process0 [P0]: rows_owned=[0,1]
4052 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4053 $        j =  {0,0,2}  [size = 3]
4054 $        v =  {1,2,3}  [size = 3]
4055 $
4056 $     Process1 [P1]: rows_owned=[2]
4057 $        i =  {0,3}    [size = nrow+1  = 1+1]
4058 $        j =  {0,1,2}  [size = 3]
4059 $        v =  {4,5,6}  [size = 3]
4060 
4061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4062           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4063 @*/
4064 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4065 {
4066   PetscErrorCode ierr;
4067 
4068   PetscFunctionBegin;
4069   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4070   PetscFunctionReturn(0);
4071 }
4072 
4073 /*@C
4074    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4075    (the default parallel PETSc format).  For good matrix assembly performance
4076    the user should preallocate the matrix storage by setting the parameters
4077    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4078    performance can be increased by more than a factor of 50.
4079 
4080    Collective
4081 
4082    Input Parameters:
4083 +  B - the matrix
4084 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4085            (same value is used for all local rows)
4086 .  d_nnz - array containing the number of nonzeros in the various rows of the
4087            DIAGONAL portion of the local submatrix (possibly different for each row)
4088            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4089            The size of this array is equal to the number of local rows, i.e 'm'.
4090            For matrices that will be factored, you must leave room for (and set)
4091            the diagonal entry even if it is zero.
4092 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4093            submatrix (same value is used for all local rows).
4094 -  o_nnz - array containing the number of nonzeros in the various rows of the
4095            OFF-DIAGONAL portion of the local submatrix (possibly different for
4096            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4097            structure. The size of this array is equal to the number
4098            of local rows, i.e 'm'.
4099 
4100    If the *_nnz parameter is given then the *_nz parameter is ignored
4101 
4102    The AIJ format (also called the Yale sparse matrix format or
4103    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4104    storage.  The stored row and column indices begin with zero.
4105    See Users-Manual: ch_mat for details.
4106 
4107    The parallel matrix is partitioned such that the first m0 rows belong to
4108    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4109    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4110 
4111    The DIAGONAL portion of the local submatrix of a processor can be defined
4112    as the submatrix which is obtained by extraction the part corresponding to
4113    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4114    first row that belongs to the processor, r2 is the last row belonging to
4115    the this processor, and c1-c2 is range of indices of the local part of a
4116    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4117    common case of a square matrix, the row and column ranges are the same and
4118    the DIAGONAL part is also square. The remaining portion of the local
4119    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4120 
4121    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4122 
4123    You can call MatGetInfo() to get information on how effective the preallocation was;
4124    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4125    You can also run with the option -info and look for messages with the string
4126    malloc in them to see if additional memory allocation was needed.
4127 
4128    Example usage:
4129 
4130    Consider the following 8x8 matrix with 34 non-zero values, that is
4131    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4132    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4133    as follows:
4134 
4135 .vb
4136             1  2  0  |  0  3  0  |  0  4
4137     Proc0   0  5  6  |  7  0  0  |  8  0
4138             9  0 10  | 11  0  0  | 12  0
4139     -------------------------------------
4140            13  0 14  | 15 16 17  |  0  0
4141     Proc1   0 18  0  | 19 20 21  |  0  0
4142             0  0  0  | 22 23  0  | 24  0
4143     -------------------------------------
4144     Proc2  25 26 27  |  0  0 28  | 29  0
4145            30  0  0  | 31 32 33  |  0 34
4146 .ve
4147 
4148    This can be represented as a collection of submatrices as:
4149 
4150 .vb
4151       A B C
4152       D E F
4153       G H I
4154 .ve
4155 
4156    Where the submatrices A,B,C are owned by proc0, D,E,F are
4157    owned by proc1, G,H,I are owned by proc2.
4158 
4159    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4160    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4161    The 'M','N' parameters are 8,8, and have the same values on all procs.
4162 
4163    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4164    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4165    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4166    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4167    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4168    matrix, ans [DF] as another SeqAIJ matrix.
4169 
4170    When d_nz, o_nz parameters are specified, d_nz storage elements are
4171    allocated for every row of the local diagonal submatrix, and o_nz
4172    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4173    One way to choose d_nz and o_nz is to use the max nonzerors per local
4174    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4175    In this case, the values of d_nz,o_nz are:
4176 .vb
4177      proc0 : dnz = 2, o_nz = 2
4178      proc1 : dnz = 3, o_nz = 2
4179      proc2 : dnz = 1, o_nz = 4
4180 .ve
4181    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4182    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4183    for proc3. i.e we are using 12+15+10=37 storage locations to store
4184    34 values.
4185 
4186    When d_nnz, o_nnz parameters are specified, the storage is specified
4187    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4188    In the above case the values for d_nnz,o_nnz are:
4189 .vb
4190      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4191      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4192      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4193 .ve
4194    Here the space allocated is sum of all the above values i.e 34, and
4195    hence pre-allocation is perfect.
4196 
4197    Level: intermediate
4198 
4199 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4200           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4201 @*/
4202 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4203 {
4204   PetscErrorCode ierr;
4205 
4206   PetscFunctionBegin;
4207   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4208   PetscValidType(B,1);
4209   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 /*@
4214      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4215          CSR format for the local rows.
4216 
4217    Collective
4218 
4219    Input Parameters:
4220 +  comm - MPI communicator
4221 .  m - number of local rows (Cannot be PETSC_DECIDE)
4222 .  n - This value should be the same as the local size used in creating the
4223        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4224        calculated if N is given) For square matrices n is almost always m.
4225 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4226 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4227 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4228 .   j - column indices
4229 -   a - matrix values
4230 
4231    Output Parameter:
4232 .   mat - the matrix
4233 
4234    Level: intermediate
4235 
4236    Notes:
4237        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4238      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4239      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4240 
4241        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4242 
4243        The format which is used for the sparse matrix input, is equivalent to a
4244     row-major ordering.. i.e for the following matrix, the input data expected is
4245     as shown
4246 
4247        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4248 
4249 $        1 0 0
4250 $        2 0 3     P0
4251 $       -------
4252 $        4 5 6     P1
4253 $
4254 $     Process0 [P0]: rows_owned=[0,1]
4255 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4256 $        j =  {0,0,2}  [size = 3]
4257 $        v =  {1,2,3}  [size = 3]
4258 $
4259 $     Process1 [P1]: rows_owned=[2]
4260 $        i =  {0,3}    [size = nrow+1  = 1+1]
4261 $        j =  {0,1,2}  [size = 3]
4262 $        v =  {4,5,6}  [size = 3]
4263 
4264 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4265           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4266 @*/
4267 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4268 {
4269   PetscErrorCode ierr;
4270 
4271   PetscFunctionBegin;
4272   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4273   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4274   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4275   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4276   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4277   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4278   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4279   PetscFunctionReturn(0);
4280 }
4281 
4282 /*@
4283      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4284          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  mat - the matrix
4290 .  m - number of local rows (Cannot be PETSC_DECIDE)
4291 .  n - This value should be the same as the local size used in creating the
4292        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4293        calculated if N is given) For square matrices n is almost always m.
4294 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4295 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4296 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4297 .  J - column indices
4298 -  v - matrix values
4299 
4300    Level: intermediate
4301 
4302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4303           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4304 @*/
4305 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4306 {
4307   PetscErrorCode ierr;
4308   PetscInt       cstart,nnz,i,j;
4309   PetscInt       *ld;
4310   PetscBool      nooffprocentries;
4311   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4312   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4313   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4314   const PetscInt *Adi = Ad->i;
4315   PetscInt       ldi,Iii,md;
4316 
4317   PetscFunctionBegin;
4318   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4319   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4320   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4321   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4322 
4323   cstart = mat->cmap->rstart;
4324   if (!Aij->ld) {
4325     /* count number of entries below block diagonal */
4326     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4327     Aij->ld = ld;
4328     for (i=0; i<m; i++) {
4329       nnz  = Ii[i+1]- Ii[i];
4330       j     = 0;
4331       while  (J[j] < cstart && j < nnz) {j++;}
4332       J    += nnz;
4333       ld[i] = j;
4334     }
4335   } else {
4336     ld = Aij->ld;
4337   }
4338 
4339   for (i=0; i<m; i++) {
4340     nnz  = Ii[i+1]- Ii[i];
4341     Iii  = Ii[i];
4342     ldi  = ld[i];
4343     md   = Adi[i+1]-Adi[i];
4344     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4345     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4346     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4347     ad  += md;
4348     ao  += nnz - md;
4349   }
4350   nooffprocentries      = mat->nooffprocentries;
4351   mat->nooffprocentries = PETSC_TRUE;
4352   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4353   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4354   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4355   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4356   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4357   mat->nooffprocentries = nooffprocentries;
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 /*@C
4362    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4363    (the default parallel PETSc format).  For good matrix assembly performance
4364    the user should preallocate the matrix storage by setting the parameters
4365    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4366    performance can be increased by more than a factor of 50.
4367 
4368    Collective
4369 
4370    Input Parameters:
4371 +  comm - MPI communicator
4372 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4373            This value should be the same as the local size used in creating the
4374            y vector for the matrix-vector product y = Ax.
4375 .  n - This value should be the same as the local size used in creating the
4376        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4377        calculated if N is given) For square matrices n is almost always m.
4378 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4379 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4380 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4381            (same value is used for all local rows)
4382 .  d_nnz - array containing the number of nonzeros in the various rows of the
4383            DIAGONAL portion of the local submatrix (possibly different for each row)
4384            or NULL, if d_nz is used to specify the nonzero structure.
4385            The size of this array is equal to the number of local rows, i.e 'm'.
4386 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4387            submatrix (same value is used for all local rows).
4388 -  o_nnz - array containing the number of nonzeros in the various rows of the
4389            OFF-DIAGONAL portion of the local submatrix (possibly different for
4390            each row) or NULL, if o_nz is used to specify the nonzero
4391            structure. The size of this array is equal to the number
4392            of local rows, i.e 'm'.
4393 
4394    Output Parameter:
4395 .  A - the matrix
4396 
4397    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4398    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4399    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4400 
4401    Notes:
4402    If the *_nnz parameter is given then the *_nz parameter is ignored
4403 
4404    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4405    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4406    storage requirements for this matrix.
4407 
4408    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4409    processor than it must be used on all processors that share the object for
4410    that argument.
4411 
4412    The user MUST specify either the local or global matrix dimensions
4413    (possibly both).
4414 
4415    The parallel matrix is partitioned across processors such that the
4416    first m0 rows belong to process 0, the next m1 rows belong to
4417    process 1, the next m2 rows belong to process 2 etc.. where
4418    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4419    values corresponding to [m x N] submatrix.
4420 
4421    The columns are logically partitioned with the n0 columns belonging
4422    to 0th partition, the next n1 columns belonging to the next
4423    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4424 
4425    The DIAGONAL portion of the local submatrix on any given processor
4426    is the submatrix corresponding to the rows and columns m,n
4427    corresponding to the given processor. i.e diagonal matrix on
4428    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4429    etc. The remaining portion of the local submatrix [m x (N-n)]
4430    constitute the OFF-DIAGONAL portion. The example below better
4431    illustrates this concept.
4432 
4433    For a square global matrix we define each processor's diagonal portion
4434    to be its local rows and the corresponding columns (a square submatrix);
4435    each processor's off-diagonal portion encompasses the remainder of the
4436    local matrix (a rectangular submatrix).
4437 
4438    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4439 
4440    When calling this routine with a single process communicator, a matrix of
4441    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4442    type of communicator, use the construction mechanism
4443 .vb
4444      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4445 .ve
4446 
4447 $     MatCreate(...,&A);
4448 $     MatSetType(A,MATMPIAIJ);
4449 $     MatSetSizes(A, m,n,M,N);
4450 $     MatMPIAIJSetPreallocation(A,...);
4451 
4452    By default, this format uses inodes (identical nodes) when possible.
4453    We search for consecutive rows with the same nonzero structure, thereby
4454    reusing matrix information to achieve increased efficiency.
4455 
4456    Options Database Keys:
4457 +  -mat_no_inode  - Do not use inodes
4458 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4459 
4460 
4461 
4462    Example usage:
4463 
4464    Consider the following 8x8 matrix with 34 non-zero values, that is
4465    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4466    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4467    as follows
4468 
4469 .vb
4470             1  2  0  |  0  3  0  |  0  4
4471     Proc0   0  5  6  |  7  0  0  |  8  0
4472             9  0 10  | 11  0  0  | 12  0
4473     -------------------------------------
4474            13  0 14  | 15 16 17  |  0  0
4475     Proc1   0 18  0  | 19 20 21  |  0  0
4476             0  0  0  | 22 23  0  | 24  0
4477     -------------------------------------
4478     Proc2  25 26 27  |  0  0 28  | 29  0
4479            30  0  0  | 31 32 33  |  0 34
4480 .ve
4481 
4482    This can be represented as a collection of submatrices as
4483 
4484 .vb
4485       A B C
4486       D E F
4487       G H I
4488 .ve
4489 
4490    Where the submatrices A,B,C are owned by proc0, D,E,F are
4491    owned by proc1, G,H,I are owned by proc2.
4492 
4493    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4494    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4495    The 'M','N' parameters are 8,8, and have the same values on all procs.
4496 
4497    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4498    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4499    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4500    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4501    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4502    matrix, ans [DF] as another SeqAIJ matrix.
4503 
4504    When d_nz, o_nz parameters are specified, d_nz storage elements are
4505    allocated for every row of the local diagonal submatrix, and o_nz
4506    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4507    One way to choose d_nz and o_nz is to use the max nonzerors per local
4508    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4509    In this case, the values of d_nz,o_nz are
4510 .vb
4511      proc0 : dnz = 2, o_nz = 2
4512      proc1 : dnz = 3, o_nz = 2
4513      proc2 : dnz = 1, o_nz = 4
4514 .ve
4515    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4516    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4517    for proc3. i.e we are using 12+15+10=37 storage locations to store
4518    34 values.
4519 
4520    When d_nnz, o_nnz parameters are specified, the storage is specified
4521    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4522    In the above case the values for d_nnz,o_nnz are
4523 .vb
4524      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4525      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4526      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4527 .ve
4528    Here the space allocated is sum of all the above values i.e 34, and
4529    hence pre-allocation is perfect.
4530 
4531    Level: intermediate
4532 
4533 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4534           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4535 @*/
4536 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4537 {
4538   PetscErrorCode ierr;
4539   PetscMPIInt    size;
4540 
4541   PetscFunctionBegin;
4542   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4543   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4545   if (size > 1) {
4546     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4547     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4548   } else {
4549     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4550     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4551   }
4552   PetscFunctionReturn(0);
4553 }
4554 
4555 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4556 {
4557   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4558   PetscBool      flg;
4559   PetscErrorCode ierr;
4560 
4561   PetscFunctionBegin;
4562   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4563   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4564   if (Ad)     *Ad     = a->A;
4565   if (Ao)     *Ao     = a->B;
4566   if (colmap) *colmap = a->garray;
4567   PetscFunctionReturn(0);
4568 }
4569 
4570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4571 {
4572   PetscErrorCode ierr;
4573   PetscInt       m,N,i,rstart,nnz,Ii;
4574   PetscInt       *indx;
4575   PetscScalar    *values;
4576 
4577   PetscFunctionBegin;
4578   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4579   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4580     PetscInt       *dnz,*onz,sum,bs,cbs;
4581 
4582     if (n == PETSC_DECIDE) {
4583       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4584     }
4585     /* Check sum(n) = N */
4586     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4587     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4588 
4589     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4590     rstart -= m;
4591 
4592     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4593     for (i=0; i<m; i++) {
4594       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4595       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4596       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4597     }
4598 
4599     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4600     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4601     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4602     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4603     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4604     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4605     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4606     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4607   }
4608 
4609   /* numeric phase */
4610   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4611   for (i=0; i<m; i++) {
4612     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4613     Ii   = i + rstart;
4614     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4615     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4616   }
4617   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4618   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4619   PetscFunctionReturn(0);
4620 }
4621 
4622 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4623 {
4624   PetscErrorCode    ierr;
4625   PetscMPIInt       rank;
4626   PetscInt          m,N,i,rstart,nnz;
4627   size_t            len;
4628   const PetscInt    *indx;
4629   PetscViewer       out;
4630   char              *name;
4631   Mat               B;
4632   const PetscScalar *values;
4633 
4634   PetscFunctionBegin;
4635   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4636   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4637   /* Should this be the type of the diagonal block of A? */
4638   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4639   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4640   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4641   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4642   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4643   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4644   for (i=0; i<m; i++) {
4645     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4646     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4647     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4648   }
4649   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4650   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4651 
4652   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4653   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4654   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4655   sprintf(name,"%s.%d",outfile,rank);
4656   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4657   ierr = PetscFree(name);CHKERRQ(ierr);
4658   ierr = MatView(B,out);CHKERRQ(ierr);
4659   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4660   ierr = MatDestroy(&B);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4665 {
4666   PetscErrorCode      ierr;
4667   Mat_Merge_SeqsToMPI *merge;
4668   PetscContainer      container;
4669 
4670   PetscFunctionBegin;
4671   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4672   if (container) {
4673     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4674     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4675     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4676     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4677     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4678     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4679     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4680     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4681     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4682     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4683     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4684     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4685     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4686     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4687     ierr = PetscFree(merge);CHKERRQ(ierr);
4688     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4689   }
4690   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4691   PetscFunctionReturn(0);
4692 }
4693 
4694 #include <../src/mat/utils/freespace.h>
4695 #include <petscbt.h>
4696 
4697 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4698 {
4699   PetscErrorCode      ierr;
4700   MPI_Comm            comm;
4701   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4702   PetscMPIInt         size,rank,taga,*len_s;
4703   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4704   PetscInt            proc,m;
4705   PetscInt            **buf_ri,**buf_rj;
4706   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4707   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4708   MPI_Request         *s_waits,*r_waits;
4709   MPI_Status          *status;
4710   MatScalar           *aa=a->a;
4711   MatScalar           **abuf_r,*ba_i;
4712   Mat_Merge_SeqsToMPI *merge;
4713   PetscContainer      container;
4714 
4715   PetscFunctionBegin;
4716   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4717   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4718 
4719   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4720   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4721 
4722   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4723   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4724 
4725   bi     = merge->bi;
4726   bj     = merge->bj;
4727   buf_ri = merge->buf_ri;
4728   buf_rj = merge->buf_rj;
4729 
4730   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4731   owners = merge->rowmap->range;
4732   len_s  = merge->len_s;
4733 
4734   /* send and recv matrix values */
4735   /*-----------------------------*/
4736   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4737   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4738 
4739   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4740   for (proc=0,k=0; proc<size; proc++) {
4741     if (!len_s[proc]) continue;
4742     i    = owners[proc];
4743     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4744     k++;
4745   }
4746 
4747   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4748   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4749   ierr = PetscFree(status);CHKERRQ(ierr);
4750 
4751   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4752   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4753 
4754   /* insert mat values of mpimat */
4755   /*----------------------------*/
4756   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4757   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4758 
4759   for (k=0; k<merge->nrecv; k++) {
4760     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4761     nrows       = *(buf_ri_k[k]);
4762     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4763     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4764   }
4765 
4766   /* set values of ba */
4767   m = merge->rowmap->n;
4768   for (i=0; i<m; i++) {
4769     arow = owners[rank] + i;
4770     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4771     bnzi = bi[i+1] - bi[i];
4772     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4773 
4774     /* add local non-zero vals of this proc's seqmat into ba */
4775     anzi   = ai[arow+1] - ai[arow];
4776     aj     = a->j + ai[arow];
4777     aa     = a->a + ai[arow];
4778     nextaj = 0;
4779     for (j=0; nextaj<anzi; j++) {
4780       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4781         ba_i[j] += aa[nextaj++];
4782       }
4783     }
4784 
4785     /* add received vals into ba */
4786     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4787       /* i-th row */
4788       if (i == *nextrow[k]) {
4789         anzi   = *(nextai[k]+1) - *nextai[k];
4790         aj     = buf_rj[k] + *(nextai[k]);
4791         aa     = abuf_r[k] + *(nextai[k]);
4792         nextaj = 0;
4793         for (j=0; nextaj<anzi; j++) {
4794           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4795             ba_i[j] += aa[nextaj++];
4796           }
4797         }
4798         nextrow[k]++; nextai[k]++;
4799       }
4800     }
4801     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4802   }
4803   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4804   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4805 
4806   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4807   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4808   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4809   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4810   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4811   PetscFunctionReturn(0);
4812 }
4813 
4814 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4815 {
4816   PetscErrorCode      ierr;
4817   Mat                 B_mpi;
4818   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4819   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4820   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4821   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4822   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4823   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4824   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4825   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4826   MPI_Status          *status;
4827   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4828   PetscBT             lnkbt;
4829   Mat_Merge_SeqsToMPI *merge;
4830   PetscContainer      container;
4831 
4832   PetscFunctionBegin;
4833   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4834 
4835   /* make sure it is a PETSc comm */
4836   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4837   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4838   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4839 
4840   ierr = PetscNew(&merge);CHKERRQ(ierr);
4841   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4842 
4843   /* determine row ownership */
4844   /*---------------------------------------------------------*/
4845   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4846   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4847   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4848   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4849   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4850   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4851   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4852 
4853   m      = merge->rowmap->n;
4854   owners = merge->rowmap->range;
4855 
4856   /* determine the number of messages to send, their lengths */
4857   /*---------------------------------------------------------*/
4858   len_s = merge->len_s;
4859 
4860   len          = 0; /* length of buf_si[] */
4861   merge->nsend = 0;
4862   for (proc=0; proc<size; proc++) {
4863     len_si[proc] = 0;
4864     if (proc == rank) {
4865       len_s[proc] = 0;
4866     } else {
4867       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4868       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4869     }
4870     if (len_s[proc]) {
4871       merge->nsend++;
4872       nrows = 0;
4873       for (i=owners[proc]; i<owners[proc+1]; i++) {
4874         if (ai[i+1] > ai[i]) nrows++;
4875       }
4876       len_si[proc] = 2*(nrows+1);
4877       len         += len_si[proc];
4878     }
4879   }
4880 
4881   /* determine the number and length of messages to receive for ij-structure */
4882   /*-------------------------------------------------------------------------*/
4883   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4884   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4885 
4886   /* post the Irecv of j-structure */
4887   /*-------------------------------*/
4888   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4889   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4890 
4891   /* post the Isend of j-structure */
4892   /*--------------------------------*/
4893   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4894 
4895   for (proc=0, k=0; proc<size; proc++) {
4896     if (!len_s[proc]) continue;
4897     i    = owners[proc];
4898     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4899     k++;
4900   }
4901 
4902   /* receives and sends of j-structure are complete */
4903   /*------------------------------------------------*/
4904   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4905   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4906 
4907   /* send and recv i-structure */
4908   /*---------------------------*/
4909   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4910   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4911 
4912   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4913   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4914   for (proc=0,k=0; proc<size; proc++) {
4915     if (!len_s[proc]) continue;
4916     /* form outgoing message for i-structure:
4917          buf_si[0]:                 nrows to be sent
4918                [1:nrows]:           row index (global)
4919                [nrows+1:2*nrows+1]: i-structure index
4920     */
4921     /*-------------------------------------------*/
4922     nrows       = len_si[proc]/2 - 1;
4923     buf_si_i    = buf_si + nrows+1;
4924     buf_si[0]   = nrows;
4925     buf_si_i[0] = 0;
4926     nrows       = 0;
4927     for (i=owners[proc]; i<owners[proc+1]; i++) {
4928       anzi = ai[i+1] - ai[i];
4929       if (anzi) {
4930         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4931         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4932         nrows++;
4933       }
4934     }
4935     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4936     k++;
4937     buf_si += len_si[proc];
4938   }
4939 
4940   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4941   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4942 
4943   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4944   for (i=0; i<merge->nrecv; i++) {
4945     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4946   }
4947 
4948   ierr = PetscFree(len_si);CHKERRQ(ierr);
4949   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4950   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4951   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4952   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4953   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4954   ierr = PetscFree(status);CHKERRQ(ierr);
4955 
4956   /* compute a local seq matrix in each processor */
4957   /*----------------------------------------------*/
4958   /* allocate bi array and free space for accumulating nonzero column info */
4959   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4960   bi[0] = 0;
4961 
4962   /* create and initialize a linked list */
4963   nlnk = N+1;
4964   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4965 
4966   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4967   len  = ai[owners[rank+1]] - ai[owners[rank]];
4968   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4969 
4970   current_space = free_space;
4971 
4972   /* determine symbolic info for each local row */
4973   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4974 
4975   for (k=0; k<merge->nrecv; k++) {
4976     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4977     nrows       = *buf_ri_k[k];
4978     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4979     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4980   }
4981 
4982   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4983   len  = 0;
4984   for (i=0; i<m; i++) {
4985     bnzi = 0;
4986     /* add local non-zero cols of this proc's seqmat into lnk */
4987     arow  = owners[rank] + i;
4988     anzi  = ai[arow+1] - ai[arow];
4989     aj    = a->j + ai[arow];
4990     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4991     bnzi += nlnk;
4992     /* add received col data into lnk */
4993     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4994       if (i == *nextrow[k]) { /* i-th row */
4995         anzi  = *(nextai[k]+1) - *nextai[k];
4996         aj    = buf_rj[k] + *nextai[k];
4997         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4998         bnzi += nlnk;
4999         nextrow[k]++; nextai[k]++;
5000       }
5001     }
5002     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5003 
5004     /* if free space is not available, make more free space */
5005     if (current_space->local_remaining<bnzi) {
5006       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5007       nspacedouble++;
5008     }
5009     /* copy data into free space, then initialize lnk */
5010     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5011     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5012 
5013     current_space->array           += bnzi;
5014     current_space->local_used      += bnzi;
5015     current_space->local_remaining -= bnzi;
5016 
5017     bi[i+1] = bi[i] + bnzi;
5018   }
5019 
5020   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5021 
5022   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5023   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5024   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5025 
5026   /* create symbolic parallel matrix B_mpi */
5027   /*---------------------------------------*/
5028   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5029   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5030   if (n==PETSC_DECIDE) {
5031     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5032   } else {
5033     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5034   }
5035   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5036   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5037   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5038   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5039   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5040 
5041   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5042   B_mpi->assembled    = PETSC_FALSE;
5043   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5044   merge->bi           = bi;
5045   merge->bj           = bj;
5046   merge->buf_ri       = buf_ri;
5047   merge->buf_rj       = buf_rj;
5048   merge->coi          = NULL;
5049   merge->coj          = NULL;
5050   merge->owners_co    = NULL;
5051 
5052   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5053 
5054   /* attach the supporting struct to B_mpi for reuse */
5055   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5056   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5057   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5058   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5059   *mpimat = B_mpi;
5060 
5061   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5062   PetscFunctionReturn(0);
5063 }
5064 
5065 /*@C
5066       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5067                  matrices from each processor
5068 
5069     Collective
5070 
5071    Input Parameters:
5072 +    comm - the communicators the parallel matrix will live on
5073 .    seqmat - the input sequential matrices
5074 .    m - number of local rows (or PETSC_DECIDE)
5075 .    n - number of local columns (or PETSC_DECIDE)
5076 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5077 
5078    Output Parameter:
5079 .    mpimat - the parallel matrix generated
5080 
5081     Level: advanced
5082 
5083    Notes:
5084      The dimensions of the sequential matrix in each processor MUST be the same.
5085      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5086      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5087 @*/
5088 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5089 {
5090   PetscErrorCode ierr;
5091   PetscMPIInt    size;
5092 
5093   PetscFunctionBegin;
5094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5095   if (size == 1) {
5096     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5097     if (scall == MAT_INITIAL_MATRIX) {
5098       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5099     } else {
5100       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5101     }
5102     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5103     PetscFunctionReturn(0);
5104   }
5105   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5106   if (scall == MAT_INITIAL_MATRIX) {
5107     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5108   }
5109   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5110   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5111   PetscFunctionReturn(0);
5112 }
5113 
5114 /*@
5115      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5116           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5117           with MatGetSize()
5118 
5119     Not Collective
5120 
5121    Input Parameters:
5122 +    A - the matrix
5123 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5124 
5125    Output Parameter:
5126 .    A_loc - the local sequential matrix generated
5127 
5128     Level: developer
5129 
5130 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5131 
5132 @*/
5133 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5134 {
5135   PetscErrorCode ierr;
5136   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5137   Mat_SeqAIJ     *mat,*a,*b;
5138   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5139   MatScalar      *aa,*ba,*cam;
5140   PetscScalar    *ca;
5141   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5142   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5143   PetscBool      match;
5144   MPI_Comm       comm;
5145   PetscMPIInt    size;
5146 
5147   PetscFunctionBegin;
5148   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5149   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5150   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5151   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5152   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5153 
5154   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5155   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5156   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5157   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5158   aa = a->a; ba = b->a;
5159   if (scall == MAT_INITIAL_MATRIX) {
5160     if (size == 1) {
5161       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5162       PetscFunctionReturn(0);
5163     }
5164 
5165     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5166     ci[0] = 0;
5167     for (i=0; i<am; i++) {
5168       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5169     }
5170     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5171     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5172     k    = 0;
5173     for (i=0; i<am; i++) {
5174       ncols_o = bi[i+1] - bi[i];
5175       ncols_d = ai[i+1] - ai[i];
5176       /* off-diagonal portion of A */
5177       for (jo=0; jo<ncols_o; jo++) {
5178         col = cmap[*bj];
5179         if (col >= cstart) break;
5180         cj[k]   = col; bj++;
5181         ca[k++] = *ba++;
5182       }
5183       /* diagonal portion of A */
5184       for (j=0; j<ncols_d; j++) {
5185         cj[k]   = cstart + *aj++;
5186         ca[k++] = *aa++;
5187       }
5188       /* off-diagonal portion of A */
5189       for (j=jo; j<ncols_o; j++) {
5190         cj[k]   = cmap[*bj++];
5191         ca[k++] = *ba++;
5192       }
5193     }
5194     /* put together the new matrix */
5195     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5196     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5197     /* Since these are PETSc arrays, change flags to free them as necessary. */
5198     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5199     mat->free_a  = PETSC_TRUE;
5200     mat->free_ij = PETSC_TRUE;
5201     mat->nonew   = 0;
5202   } else if (scall == MAT_REUSE_MATRIX) {
5203     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5204     ci = mat->i; cj = mat->j; cam = mat->a;
5205     for (i=0; i<am; i++) {
5206       /* off-diagonal portion of A */
5207       ncols_o = bi[i+1] - bi[i];
5208       for (jo=0; jo<ncols_o; jo++) {
5209         col = cmap[*bj];
5210         if (col >= cstart) break;
5211         *cam++ = *ba++; bj++;
5212       }
5213       /* diagonal portion of A */
5214       ncols_d = ai[i+1] - ai[i];
5215       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5216       /* off-diagonal portion of A */
5217       for (j=jo; j<ncols_o; j++) {
5218         *cam++ = *ba++; bj++;
5219       }
5220     }
5221   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5222   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5223   PetscFunctionReturn(0);
5224 }
5225 
5226 /*@C
5227      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5228 
5229     Not Collective
5230 
5231    Input Parameters:
5232 +    A - the matrix
5233 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5234 -    row, col - index sets of rows and columns to extract (or NULL)
5235 
5236    Output Parameter:
5237 .    A_loc - the local sequential matrix generated
5238 
5239     Level: developer
5240 
5241 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5242 
5243 @*/
5244 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5245 {
5246   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5247   PetscErrorCode ierr;
5248   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5249   IS             isrowa,iscola;
5250   Mat            *aloc;
5251   PetscBool      match;
5252 
5253   PetscFunctionBegin;
5254   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5255   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5256   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5257   if (!row) {
5258     start = A->rmap->rstart; end = A->rmap->rend;
5259     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5260   } else {
5261     isrowa = *row;
5262   }
5263   if (!col) {
5264     start = A->cmap->rstart;
5265     cmap  = a->garray;
5266     nzA   = a->A->cmap->n;
5267     nzB   = a->B->cmap->n;
5268     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5269     ncols = 0;
5270     for (i=0; i<nzB; i++) {
5271       if (cmap[i] < start) idx[ncols++] = cmap[i];
5272       else break;
5273     }
5274     imark = i;
5275     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5276     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5277     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5278   } else {
5279     iscola = *col;
5280   }
5281   if (scall != MAT_INITIAL_MATRIX) {
5282     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5283     aloc[0] = *A_loc;
5284   }
5285   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5286   if (!col) { /* attach global id of condensed columns */
5287     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5288   }
5289   *A_loc = aloc[0];
5290   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5291   if (!row) {
5292     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5293   }
5294   if (!col) {
5295     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5296   }
5297   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5298   PetscFunctionReturn(0);
5299 }
5300 
5301 /*
5302  * Destroy a mat that may be composed with PetscSF communication objects.
5303  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5304  * */
5305 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5306 {
5307   PetscSF          sf,osf;
5308   IS               map;
5309   PetscErrorCode   ierr;
5310 
5311   PetscFunctionBegin;
5312   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5313   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5314   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5315   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5316   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5317   ierr = ISDestroy(&map);CHKERRQ(ierr);
5318   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5319   PetscFunctionReturn(0);
5320 }
5321 
5322 /*
5323  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5324  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5325  * on a global size.
5326  * */
5327 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5328 {
5329   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5330   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5331   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol;
5332   PetscSFNode              *iremote,*oiremote;
5333   const PetscInt           *lrowindices;
5334   PetscErrorCode           ierr;
5335   PetscSF                  sf,osf;
5336   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5337   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5338   MPI_Comm                 comm;
5339   ISLocalToGlobalMapping   mapping;
5340 
5341   PetscFunctionBegin;
5342   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5343   /* plocalsize is the number of roots
5344    * nrows is the number of leaves
5345    * */
5346   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5347   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5348   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5349   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5350   for (i=0;i<nrows;i++) {
5351     /* Find a remote index and an owner for a row
5352      * The row could be local or remote
5353      * */
5354     owner = 0;
5355     lidx  = 0;
5356     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5357     iremote[i].index = lidx;
5358     iremote[i].rank  = owner;
5359   }
5360   /* Create SF to communicate how many nonzero columns for each row */
5361   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5362   /* SF will figure out the number of nonzero colunms for each row, and their
5363    * offsets
5364    * */
5365   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5366   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5367   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5368 
5369   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5370   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5371   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5372   roffsets[0] = 0;
5373   roffsets[1] = 0;
5374   for (i=0;i<plocalsize;i++) {
5375     /* diag */
5376     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5377     /* off diag */
5378     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5379     /* compute offsets so that we relative location for each row */
5380     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5381     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5382   }
5383   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5384   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5385   /* 'r' means root, and 'l' means leaf */
5386   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5387   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5388   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5389   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5390   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5391   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5392   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5393   dntotalcols = 0;
5394   ontotalcols = 0;
5395   ncol = 0;
5396   for (i=0;i<nrows;i++) {
5397     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5398     ncol = PetscMax(pnnz[i],ncol);
5399     /* diag */
5400     dntotalcols += nlcols[i*2+0];
5401     /* off diag */
5402     ontotalcols += nlcols[i*2+1];
5403   }
5404   /* We do not need to figure the right number of columns
5405    * since all the calculations will be done by going through the raw data
5406    * */
5407   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5408   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5409   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5410   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5411   /* diag */
5412   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5413   /* off diag */
5414   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5415   /* diag */
5416   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5417   /* off diag */
5418   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5419   dntotalcols = 0;
5420   ontotalcols = 0;
5421   ntotalcols  = 0;
5422   for (i=0;i<nrows;i++) {
5423     owner = 0;
5424     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5425     /* Set iremote for diag matrix */
5426     for (j=0;j<nlcols[i*2+0];j++) {
5427       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5428       iremote[dntotalcols].rank    = owner;
5429       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5430       ilocal[dntotalcols++]        = ntotalcols++;
5431     }
5432     /* off diag */
5433     for (j=0;j<nlcols[i*2+1];j++) {
5434       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5435       oiremote[ontotalcols].rank    = owner;
5436       oilocal[ontotalcols++]        = ntotalcols++;
5437     }
5438   }
5439   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5440   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5441   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5442   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5443   /* P serves as roots and P_oth is leaves
5444    * Diag matrix
5445    * */
5446   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5447   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5448   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5449 
5450   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5451   /* Off diag */
5452   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5453   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5454   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5455   /* We operate on the matrix internal data for saving memory */
5456   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5457   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5458   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5459   /* Convert to global indices for diag matrix */
5460   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5461   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5462   /* We want P_oth store global indices */
5463   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5464   /* Use memory scalable approach */
5465   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5466   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5467   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5468   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5469   /* Convert back to local indices */
5470   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5471   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5472   nout = 0;
5473   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5474   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5475   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5476   /* Exchange values */
5477   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5478   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5479   /* Stop PETSc from shrinking memory */
5480   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5481   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5482   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5483   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5484   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5485   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5486   /* ``New MatDestroy" takes care of PetscSF objects as well */
5487   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5488   PetscFunctionReturn(0);
5489 }
5490 
5491 /*
5492  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5493  * This supports MPIAIJ and MAIJ
5494  * */
5495 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5496 {
5497   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5498   Mat_SeqAIJ            *p_oth;
5499   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5500   IS                    rows,map;
5501   PetscHMapI            hamp;
5502   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5503   MPI_Comm              comm;
5504   PetscSF               sf,osf;
5505   PetscBool             has;
5506   PetscErrorCode        ierr;
5507 
5508   PetscFunctionBegin;
5509   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5510   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5511   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5512    *  and then create a submatrix (that often is an overlapping matrix)
5513    * */
5514   if (reuse==MAT_INITIAL_MATRIX) {
5515     /* Use a hash table to figure out unique keys */
5516     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5517     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5518     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5519     count = 0;
5520     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5521     for (i=0;i<a->B->cmap->n;i++) {
5522       key  = a->garray[i]/dof;
5523       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5524       if (!has) {
5525         mapping[i] = count;
5526         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5527       } else {
5528         /* Current 'i' has the same value the previous step */
5529         mapping[i] = count-1;
5530       }
5531     }
5532     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5533     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5534     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5535     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5536     off = 0;
5537     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5538     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5539     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5540     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5541     /* In case, the matrix was already created but users want to recreate the matrix */
5542     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5543     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5544     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5545     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5546   } else if (reuse==MAT_REUSE_MATRIX) {
5547     /* If matrix was already created, we simply update values using SF objects
5548      * that as attached to the matrix ealier.
5549      *  */
5550     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5551     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5552     if (!sf || !osf) {
5553       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5554     }
5555     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5556     /* Update values in place */
5557     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5558     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5559     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5560     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5561   } else {
5562     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5563   }
5564   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5565   PetscFunctionReturn(0);
5566 }
5567 
5568 /*@C
5569     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5570 
5571     Collective on Mat
5572 
5573    Input Parameters:
5574 +    A,B - the matrices in mpiaij format
5575 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5576 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5577 
5578    Output Parameter:
5579 +    rowb, colb - index sets of rows and columns of B to extract
5580 -    B_seq - the sequential matrix generated
5581 
5582     Level: developer
5583 
5584 @*/
5585 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5586 {
5587   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5588   PetscErrorCode ierr;
5589   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5590   IS             isrowb,iscolb;
5591   Mat            *bseq=NULL;
5592 
5593   PetscFunctionBegin;
5594   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5595     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5596   }
5597   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5598 
5599   if (scall == MAT_INITIAL_MATRIX) {
5600     start = A->cmap->rstart;
5601     cmap  = a->garray;
5602     nzA   = a->A->cmap->n;
5603     nzB   = a->B->cmap->n;
5604     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5605     ncols = 0;
5606     for (i=0; i<nzB; i++) {  /* row < local row index */
5607       if (cmap[i] < start) idx[ncols++] = cmap[i];
5608       else break;
5609     }
5610     imark = i;
5611     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5612     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5613     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5614     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5615   } else {
5616     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5617     isrowb  = *rowb; iscolb = *colb;
5618     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5619     bseq[0] = *B_seq;
5620   }
5621   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5622   *B_seq = bseq[0];
5623   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5624   if (!rowb) {
5625     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5626   } else {
5627     *rowb = isrowb;
5628   }
5629   if (!colb) {
5630     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5631   } else {
5632     *colb = iscolb;
5633   }
5634   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5635   PetscFunctionReturn(0);
5636 }
5637 
5638 /*
5639     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5640     of the OFF-DIAGONAL portion of local A
5641 
5642     Collective on Mat
5643 
5644    Input Parameters:
5645 +    A,B - the matrices in mpiaij format
5646 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5647 
5648    Output Parameter:
5649 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5650 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5651 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5652 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5653 
5654     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5655      for this matrix. This is not desirable..
5656 
5657     Level: developer
5658 
5659 */
5660 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5661 {
5662   PetscErrorCode         ierr;
5663   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5664   Mat_SeqAIJ             *b_oth;
5665   VecScatter             ctx;
5666   MPI_Comm               comm;
5667   const PetscMPIInt      *rprocs,*sprocs;
5668   const PetscInt         *srow,*rstarts,*sstarts;
5669   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5670   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5671   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5672   MPI_Request            *rwaits = NULL,*swaits = NULL;
5673   MPI_Status             rstatus;
5674   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5675 
5676   PetscFunctionBegin;
5677   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5678   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5679 
5680   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5681     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5682   }
5683   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5684   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5685 
5686   if (size == 1) {
5687     startsj_s = NULL;
5688     bufa_ptr  = NULL;
5689     *B_oth    = NULL;
5690     PetscFunctionReturn(0);
5691   }
5692 
5693   ctx = a->Mvctx;
5694   tag = ((PetscObject)ctx)->tag;
5695 
5696   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5697   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5698   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5699   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5700   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5701   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5702   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5703 
5704   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5705   if (scall == MAT_INITIAL_MATRIX) {
5706     /* i-array */
5707     /*---------*/
5708     /*  post receives */
5709     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5710     for (i=0; i<nrecvs; i++) {
5711       rowlen = rvalues + rstarts[i]*rbs;
5712       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5713       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5714     }
5715 
5716     /* pack the outgoing message */
5717     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5718 
5719     sstartsj[0] = 0;
5720     rstartsj[0] = 0;
5721     len         = 0; /* total length of j or a array to be sent */
5722     if (nsends) {
5723       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5724       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5725     }
5726     for (i=0; i<nsends; i++) {
5727       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5728       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5729       for (j=0; j<nrows; j++) {
5730         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5731         for (l=0; l<sbs; l++) {
5732           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5733 
5734           rowlen[j*sbs+l] = ncols;
5735 
5736           len += ncols;
5737           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5738         }
5739         k++;
5740       }
5741       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5742 
5743       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5744     }
5745     /* recvs and sends of i-array are completed */
5746     i = nrecvs;
5747     while (i--) {
5748       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5749     }
5750     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5751     ierr = PetscFree(svalues);CHKERRQ(ierr);
5752 
5753     /* allocate buffers for sending j and a arrays */
5754     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5755     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5756 
5757     /* create i-array of B_oth */
5758     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5759 
5760     b_othi[0] = 0;
5761     len       = 0; /* total length of j or a array to be received */
5762     k         = 0;
5763     for (i=0; i<nrecvs; i++) {
5764       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5765       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5766       for (j=0; j<nrows; j++) {
5767         b_othi[k+1] = b_othi[k] + rowlen[j];
5768         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5769         k++;
5770       }
5771       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5772     }
5773     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5774 
5775     /* allocate space for j and a arrrays of B_oth */
5776     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5777     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5778 
5779     /* j-array */
5780     /*---------*/
5781     /*  post receives of j-array */
5782     for (i=0; i<nrecvs; i++) {
5783       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5784       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5785     }
5786 
5787     /* pack the outgoing message j-array */
5788     if (nsends) k = sstarts[0];
5789     for (i=0; i<nsends; i++) {
5790       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5791       bufJ  = bufj+sstartsj[i];
5792       for (j=0; j<nrows; j++) {
5793         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5794         for (ll=0; ll<sbs; ll++) {
5795           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5796           for (l=0; l<ncols; l++) {
5797             *bufJ++ = cols[l];
5798           }
5799           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5800         }
5801       }
5802       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5803     }
5804 
5805     /* recvs and sends of j-array are completed */
5806     i = nrecvs;
5807     while (i--) {
5808       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5809     }
5810     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5811   } else if (scall == MAT_REUSE_MATRIX) {
5812     sstartsj = *startsj_s;
5813     rstartsj = *startsj_r;
5814     bufa     = *bufa_ptr;
5815     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5816     b_otha   = b_oth->a;
5817   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5818 
5819   /* a-array */
5820   /*---------*/
5821   /*  post receives of a-array */
5822   for (i=0; i<nrecvs; i++) {
5823     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5824     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5825   }
5826 
5827   /* pack the outgoing message a-array */
5828   if (nsends) k = sstarts[0];
5829   for (i=0; i<nsends; i++) {
5830     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5831     bufA  = bufa+sstartsj[i];
5832     for (j=0; j<nrows; j++) {
5833       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5834       for (ll=0; ll<sbs; ll++) {
5835         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5836         for (l=0; l<ncols; l++) {
5837           *bufA++ = vals[l];
5838         }
5839         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5840       }
5841     }
5842     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5843   }
5844   /* recvs and sends of a-array are completed */
5845   i = nrecvs;
5846   while (i--) {
5847     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5848   }
5849   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5850   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5851 
5852   if (scall == MAT_INITIAL_MATRIX) {
5853     /* put together the new matrix */
5854     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5855 
5856     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5857     /* Since these are PETSc arrays, change flags to free them as necessary. */
5858     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5859     b_oth->free_a  = PETSC_TRUE;
5860     b_oth->free_ij = PETSC_TRUE;
5861     b_oth->nonew   = 0;
5862 
5863     ierr = PetscFree(bufj);CHKERRQ(ierr);
5864     if (!startsj_s || !bufa_ptr) {
5865       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5866       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5867     } else {
5868       *startsj_s = sstartsj;
5869       *startsj_r = rstartsj;
5870       *bufa_ptr  = bufa;
5871     }
5872   }
5873 
5874   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5875   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5876   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5877   PetscFunctionReturn(0);
5878 }
5879 
5880 /*@C
5881   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5882 
5883   Not Collective
5884 
5885   Input Parameters:
5886 . A - The matrix in mpiaij format
5887 
5888   Output Parameter:
5889 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5890 . colmap - A map from global column index to local index into lvec
5891 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5892 
5893   Level: developer
5894 
5895 @*/
5896 #if defined(PETSC_USE_CTABLE)
5897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5898 #else
5899 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5900 #endif
5901 {
5902   Mat_MPIAIJ *a;
5903 
5904   PetscFunctionBegin;
5905   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5906   PetscValidPointer(lvec, 2);
5907   PetscValidPointer(colmap, 3);
5908   PetscValidPointer(multScatter, 4);
5909   a = (Mat_MPIAIJ*) A->data;
5910   if (lvec) *lvec = a->lvec;
5911   if (colmap) *colmap = a->colmap;
5912   if (multScatter) *multScatter = a->Mvctx;
5913   PetscFunctionReturn(0);
5914 }
5915 
5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5919 #if defined(PETSC_HAVE_MKL_SPARSE)
5920 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5921 #endif
5922 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5923 #if defined(PETSC_HAVE_ELEMENTAL)
5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5925 #endif
5926 #if defined(PETSC_HAVE_HYPRE)
5927 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5928 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5929 #endif
5930 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5932 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5933 
5934 /*
5935     Computes (B'*A')' since computing B*A directly is untenable
5936 
5937                n                       p                          p
5938         (              )       (              )         (                  )
5939       m (      A       )  *  n (       B      )   =   m (         C        )
5940         (              )       (              )         (                  )
5941 
5942 */
5943 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5944 {
5945   PetscErrorCode ierr;
5946   Mat            At,Bt,Ct;
5947 
5948   PetscFunctionBegin;
5949   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5950   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5951   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5952   ierr = MatDestroy(&At);CHKERRQ(ierr);
5953   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5954   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5955   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5956   PetscFunctionReturn(0);
5957 }
5958 
5959 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5960 {
5961   PetscErrorCode ierr;
5962   PetscInt       m=A->rmap->n,n=B->cmap->n;
5963   Mat            Cmat;
5964 
5965   PetscFunctionBegin;
5966   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5967   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5968   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5969   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5970   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5971   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5972   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5973   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5974 
5975   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5976 
5977   *C = Cmat;
5978   PetscFunctionReturn(0);
5979 }
5980 
5981 /* ----------------------------------------------------------------*/
5982 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5983 {
5984   PetscErrorCode ierr;
5985 
5986   PetscFunctionBegin;
5987   if (scall == MAT_INITIAL_MATRIX) {
5988     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5989     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5990     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5991   }
5992   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5993   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5994   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5995   PetscFunctionReturn(0);
5996 }
5997 
5998 /*MC
5999    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6000 
6001    Options Database Keys:
6002 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6003 
6004    Level: beginner
6005 
6006    Notes:
6007     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6008     in this case the values associated with the rows and columns one passes in are set to zero
6009     in the matrix
6010 
6011     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6012     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6013 
6014 .seealso: MatCreateAIJ()
6015 M*/
6016 
6017 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6018 {
6019   Mat_MPIAIJ     *b;
6020   PetscErrorCode ierr;
6021   PetscMPIInt    size;
6022 
6023   PetscFunctionBegin;
6024   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6025 
6026   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6027   B->data       = (void*)b;
6028   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6029   B->assembled  = PETSC_FALSE;
6030   B->insertmode = NOT_SET_VALUES;
6031   b->size       = size;
6032 
6033   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6034 
6035   /* build cache for off array entries formed */
6036   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6037 
6038   b->donotstash  = PETSC_FALSE;
6039   b->colmap      = 0;
6040   b->garray      = 0;
6041   b->roworiented = PETSC_TRUE;
6042 
6043   /* stuff used for matrix vector multiply */
6044   b->lvec  = NULL;
6045   b->Mvctx = NULL;
6046 
6047   /* stuff for MatGetRow() */
6048   b->rowindices   = 0;
6049   b->rowvalues    = 0;
6050   b->getrowactive = PETSC_FALSE;
6051 
6052   /* flexible pointer used in CUSP/CUSPARSE classes */
6053   b->spptr = NULL;
6054 
6055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6065 #if defined(PETSC_HAVE_MKL_SPARSE)
6066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6067 #endif
6068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6070 #if defined(PETSC_HAVE_ELEMENTAL)
6071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6072 #endif
6073 #if defined(PETSC_HAVE_HYPRE)
6074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6075 #endif
6076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6081 #if defined(PETSC_HAVE_HYPRE)
6082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6083 #endif
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6086   PetscFunctionReturn(0);
6087 }
6088 
6089 /*@C
6090      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6091          and "off-diagonal" part of the matrix in CSR format.
6092 
6093    Collective
6094 
6095    Input Parameters:
6096 +  comm - MPI communicator
6097 .  m - number of local rows (Cannot be PETSC_DECIDE)
6098 .  n - This value should be the same as the local size used in creating the
6099        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6100        calculated if N is given) For square matrices n is almost always m.
6101 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6102 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6103 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6104 .   j - column indices
6105 .   a - matrix values
6106 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6107 .   oj - column indices
6108 -   oa - matrix values
6109 
6110    Output Parameter:
6111 .   mat - the matrix
6112 
6113    Level: advanced
6114 
6115    Notes:
6116        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6117        must free the arrays once the matrix has been destroyed and not before.
6118 
6119        The i and j indices are 0 based
6120 
6121        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6122 
6123        This sets local rows and cannot be used to set off-processor values.
6124 
6125        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6126        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6127        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6128        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6129        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6130        communication if it is known that only local entries will be set.
6131 
6132 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6133           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6134 @*/
6135 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6136 {
6137   PetscErrorCode ierr;
6138   Mat_MPIAIJ     *maij;
6139 
6140   PetscFunctionBegin;
6141   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6142   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6143   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6144   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6145   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6146   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6147   maij = (Mat_MPIAIJ*) (*mat)->data;
6148 
6149   (*mat)->preallocated = PETSC_TRUE;
6150 
6151   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6152   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6153 
6154   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6155   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6156 
6157   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6158   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6159   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6160   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6161 
6162   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6163   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6164   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6165   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6166   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6167   PetscFunctionReturn(0);
6168 }
6169 
6170 /*
6171     Special version for direct calls from Fortran
6172 */
6173 #include <petsc/private/fortranimpl.h>
6174 
6175 /* Change these macros so can be used in void function */
6176 #undef CHKERRQ
6177 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6178 #undef SETERRQ2
6179 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6180 #undef SETERRQ3
6181 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6182 #undef SETERRQ
6183 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6184 
6185 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6186 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6187 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6188 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6189 #else
6190 #endif
6191 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6192 {
6193   Mat            mat  = *mmat;
6194   PetscInt       m    = *mm, n = *mn;
6195   InsertMode     addv = *maddv;
6196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6197   PetscScalar    value;
6198   PetscErrorCode ierr;
6199 
6200   MatCheckPreallocated(mat,1);
6201   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6202 
6203 #if defined(PETSC_USE_DEBUG)
6204   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6205 #endif
6206   {
6207     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6208     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6209     PetscBool roworiented = aij->roworiented;
6210 
6211     /* Some Variables required in the macro */
6212     Mat        A                    = aij->A;
6213     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6214     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6215     MatScalar  *aa                  = a->a;
6216     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6217     Mat        B                    = aij->B;
6218     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6219     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6220     MatScalar  *ba                  = b->a;
6221     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6222      * cannot use "#if defined" inside a macro. */
6223     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6224 
6225     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6226     PetscInt  nonew = a->nonew;
6227     MatScalar *ap1,*ap2;
6228 
6229     PetscFunctionBegin;
6230     for (i=0; i<m; i++) {
6231       if (im[i] < 0) continue;
6232 #if defined(PETSC_USE_DEBUG)
6233       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6234 #endif
6235       if (im[i] >= rstart && im[i] < rend) {
6236         row      = im[i] - rstart;
6237         lastcol1 = -1;
6238         rp1      = aj + ai[row];
6239         ap1      = aa + ai[row];
6240         rmax1    = aimax[row];
6241         nrow1    = ailen[row];
6242         low1     = 0;
6243         high1    = nrow1;
6244         lastcol2 = -1;
6245         rp2      = bj + bi[row];
6246         ap2      = ba + bi[row];
6247         rmax2    = bimax[row];
6248         nrow2    = bilen[row];
6249         low2     = 0;
6250         high2    = nrow2;
6251 
6252         for (j=0; j<n; j++) {
6253           if (roworiented) value = v[i*n+j];
6254           else value = v[i+j*m];
6255           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6256           if (in[j] >= cstart && in[j] < cend) {
6257             col = in[j] - cstart;
6258             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6260             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6261 #endif
6262           } else if (in[j] < 0) continue;
6263 #if defined(PETSC_USE_DEBUG)
6264           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6265           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6266 #endif
6267           else {
6268             if (mat->was_assembled) {
6269               if (!aij->colmap) {
6270                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6271               }
6272 #if defined(PETSC_USE_CTABLE)
6273               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6274               col--;
6275 #else
6276               col = aij->colmap[in[j]] - 1;
6277 #endif
6278               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6279                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6280                 col  =  in[j];
6281                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6282                 B        = aij->B;
6283                 b        = (Mat_SeqAIJ*)B->data;
6284                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6285                 rp2      = bj + bi[row];
6286                 ap2      = ba + bi[row];
6287                 rmax2    = bimax[row];
6288                 nrow2    = bilen[row];
6289                 low2     = 0;
6290                 high2    = nrow2;
6291                 bm       = aij->B->rmap->n;
6292                 ba       = b->a;
6293                 inserted = PETSC_FALSE;
6294               }
6295             } else col = in[j];
6296             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6297 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6298             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6299 #endif
6300           }
6301         }
6302       } else if (!aij->donotstash) {
6303         if (roworiented) {
6304           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6305         } else {
6306           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6307         }
6308       }
6309     }
6310   }
6311   PetscFunctionReturnVoid();
6312 }
6313