xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d5c9c0c4eebc2f2a01a1bd0c86fca87e2acd2a03)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113 
1114   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135   VecScatter     Mvctx = a->Mvctx;
1136 
1137   PetscFunctionBegin;
1138   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1139   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   /* do nondiagonal part */
1153   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1154   /* do local part */
1155   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1156   /* add partial results together */
1157   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1163 {
1164   MPI_Comm       comm;
1165   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1166   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1167   IS             Me,Notme;
1168   PetscErrorCode ierr;
1169   PetscInt       M,N,first,last,*notme,i;
1170   PetscBool      lf;
1171   PetscMPIInt    size;
1172 
1173   PetscFunctionBegin;
1174   /* Easy test: symmetric diagonal block */
1175   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1176   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1177   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1178   if (!*f) PetscFunctionReturn(0);
1179   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1181   if (size == 1) PetscFunctionReturn(0);
1182 
1183   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1184   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1185   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1186   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1187   for (i=0; i<first; i++) notme[i] = i;
1188   for (i=last; i<M; i++) notme[i-last+first] = i;
1189   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1190   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1191   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1192   Aoff = Aoffs[0];
1193   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1194   Boff = Boffs[0];
1195   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1197   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1199   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1200   ierr = PetscFree(notme);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1205 {
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214 {
1215   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   /* do nondiagonal part */
1220   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1221   /* do local part */
1222   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1223   /* add partial results together */
1224   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*
1230   This only works correctly for square matrices where the subblock A->A is the
1231    diagonal block
1232 */
1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1234 {
1235   PetscErrorCode ierr;
1236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1246 {
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1252   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1257 {
1258   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262 #if defined(PETSC_USE_LOG)
1263   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1264 #endif
1265   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1266   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1268   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1269 #if defined(PETSC_USE_CTABLE)
1270   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1271 #else
1272   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1273 #endif
1274   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1275   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1276   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1277   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1278   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1279   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1280   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1281 
1282   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1292 #if defined(PETSC_HAVE_ELEMENTAL)
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1294 #endif
1295 #if defined(PETSC_HAVE_HYPRE)
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1306 {
1307   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1308   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1309   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1310   const PetscInt    *garray = aij->garray;
1311   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1312   PetscInt          *rowlens;
1313   PetscInt          *colidxs;
1314   PetscScalar       *matvals;
1315   PetscErrorCode    ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1319 
1320   M  = mat->rmap->N;
1321   N  = mat->cmap->N;
1322   m  = mat->rmap->n;
1323   rs = mat->rmap->rstart;
1324   cs = mat->cmap->rstart;
1325   nz = A->nz + B->nz;
1326 
1327   /* write matrix header */
1328   header[0] = MAT_FILE_CLASSID;
1329   header[1] = M; header[2] = N; header[3] = nz;
1330   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1331   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1332 
1333   /* fill in and store row lengths  */
1334   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1335   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1336   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1337   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1338 
1339   /* fill in and store column indices */
1340   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1341   for (cnt=0, i=0; i<m; i++) {
1342     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1343       if (garray[B->j[jb]] > cs) break;
1344       colidxs[cnt++] = garray[B->j[jb]];
1345     }
1346     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1347       colidxs[cnt++] = A->j[ja] + cs;
1348     for (; jb<B->i[i+1]; jb++)
1349       colidxs[cnt++] = garray[B->j[jb]];
1350   }
1351   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1352   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1353   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1354 
1355   /* fill in and store nonzero values */
1356   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1357   for (cnt=0, i=0; i<m; i++) {
1358     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1359       if (garray[B->j[jb]] > cs) break;
1360       matvals[cnt++] = B->a[jb];
1361     }
1362     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1363       matvals[cnt++] = A->a[ja];
1364     for (; jb<B->i[i+1]; jb++)
1365       matvals[cnt++] = B->a[jb];
1366   }
1367   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1368   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1369   ierr = PetscFree(matvals);CHKERRQ(ierr);
1370 
1371   /* write block size option to the viewer's .info file */
1372   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #include <petscdraw.h>
1377 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1378 {
1379   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1380   PetscErrorCode    ierr;
1381   PetscMPIInt       rank = aij->rank,size = aij->size;
1382   PetscBool         isdraw,iascii,isbinary;
1383   PetscViewer       sviewer;
1384   PetscViewerFormat format;
1385 
1386   PetscFunctionBegin;
1387   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1390   if (iascii) {
1391     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1392     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1393       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1394       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1395       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1396       for (i=0; i<(PetscInt)size; i++) {
1397         nmax = PetscMax(nmax,nz[i]);
1398         nmin = PetscMin(nmin,nz[i]);
1399         navg += nz[i];
1400       }
1401       ierr = PetscFree(nz);CHKERRQ(ierr);
1402       navg = navg/size;
1403       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1404       PetscFunctionReturn(0);
1405     }
1406     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1407     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1408       MatInfo   info;
1409       PetscBool inodes;
1410 
1411       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1412       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1413       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1414       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1415       if (!inodes) {
1416         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1417                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1418       } else {
1419         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1420                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1421       }
1422       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1423       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1424       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1425       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1426       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1427       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1429       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1430       PetscFunctionReturn(0);
1431     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1432       PetscInt inodecount,inodelimit,*inodes;
1433       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1434       if (inodes) {
1435         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1436       } else {
1437         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1438       }
1439       PetscFunctionReturn(0);
1440     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1441       PetscFunctionReturn(0);
1442     }
1443   } else if (isbinary) {
1444     if (size == 1) {
1445       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1446       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1447     } else {
1448       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1449     }
1450     PetscFunctionReturn(0);
1451   } else if (iascii && size == 1) {
1452     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1453     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1454     PetscFunctionReturn(0);
1455   } else if (isdraw) {
1456     PetscDraw draw;
1457     PetscBool isnull;
1458     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1459     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1460     if (isnull) PetscFunctionReturn(0);
1461   }
1462 
1463   { /* assemble the entire matrix onto first processor */
1464     Mat A = NULL, Av;
1465     IS  isrow,iscol;
1466 
1467     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1468     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1469     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1470     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1471 /*  The commented code uses MatCreateSubMatrices instead */
1472 /*
1473     Mat *AA, A = NULL, Av;
1474     IS  isrow,iscol;
1475 
1476     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1477     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1478     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1479     if (!rank) {
1480        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1481        A    = AA[0];
1482        Av   = AA[0];
1483     }
1484     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1485 */
1486     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1487     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1488     /*
1489        Everyone has to call to draw the matrix since the graphics waits are
1490        synchronized across all processors that share the PetscDraw object
1491     */
1492     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1493     if (!rank) {
1494       if (((PetscObject)mat)->name) {
1495         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1496       }
1497       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1498     }
1499     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1500     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1501     ierr = MatDestroy(&A);CHKERRQ(ierr);
1502   }
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1507 {
1508   PetscErrorCode ierr;
1509   PetscBool      iascii,isdraw,issocket,isbinary;
1510 
1511   PetscFunctionBegin;
1512   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1513   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1514   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1515   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1516   if (iascii || isdraw || isbinary || issocket) {
1517     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1523 {
1524   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1525   PetscErrorCode ierr;
1526   Vec            bb1 = 0;
1527   PetscBool      hasop;
1528 
1529   PetscFunctionBegin;
1530   if (flag == SOR_APPLY_UPPER) {
1531     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1532     PetscFunctionReturn(0);
1533   }
1534 
1535   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1536     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1537   }
1538 
1539   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1540     if (flag & SOR_ZERO_INITIAL_GUESS) {
1541       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542       its--;
1543     }
1544 
1545     while (its--) {
1546       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548 
1549       /* update rhs: bb1 = bb - B*x */
1550       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1551       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1552 
1553       /* local sweep */
1554       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1555     }
1556   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1557     if (flag & SOR_ZERO_INITIAL_GUESS) {
1558       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1559       its--;
1560     }
1561     while (its--) {
1562       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564 
1565       /* update rhs: bb1 = bb - B*x */
1566       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1567       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1568 
1569       /* local sweep */
1570       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1571     }
1572   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1573     if (flag & SOR_ZERO_INITIAL_GUESS) {
1574       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1575       its--;
1576     }
1577     while (its--) {
1578       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580 
1581       /* update rhs: bb1 = bb - B*x */
1582       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1583       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1584 
1585       /* local sweep */
1586       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1587     }
1588   } else if (flag & SOR_EISENSTAT) {
1589     Vec xx1;
1590 
1591     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1592     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1593 
1594     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1595     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596     if (!mat->diag) {
1597       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1598       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1599     }
1600     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1601     if (hasop) {
1602       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1603     } else {
1604       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1605     }
1606     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1607 
1608     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1609 
1610     /* local sweep */
1611     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1612     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1613     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1614   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1615 
1616   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1617 
1618   matin->factorerrortype = mat->A->factorerrortype;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1623 {
1624   Mat            aA,aB,Aperm;
1625   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1626   PetscScalar    *aa,*ba;
1627   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1628   PetscSF        rowsf,sf;
1629   IS             parcolp = NULL;
1630   PetscBool      done;
1631   PetscErrorCode ierr;
1632 
1633   PetscFunctionBegin;
1634   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1635   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1636   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1637   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1638 
1639   /* Invert row permutation to find out where my rows should go */
1640   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1641   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1642   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1643   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1644   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1645   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1646 
1647   /* Invert column permutation to find out where my columns should go */
1648   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1649   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1650   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1651   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1652   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1653   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1654   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1655 
1656   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1657   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1658   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1659 
1660   /* Find out where my gcols should go */
1661   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1662   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1663   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1664   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1665   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1666   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1667   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1669 
1670   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1671   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1672   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1673   for (i=0; i<m; i++) {
1674     PetscInt    row = rdest[i];
1675     PetscMPIInt rowner;
1676     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1677     for (j=ai[i]; j<ai[i+1]; j++) {
1678       PetscInt    col = cdest[aj[j]];
1679       PetscMPIInt cowner;
1680       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1681       if (rowner == cowner) dnnz[i]++;
1682       else onnz[i]++;
1683     }
1684     for (j=bi[i]; j<bi[i+1]; j++) {
1685       PetscInt    col = gcdest[bj[j]];
1686       PetscMPIInt cowner;
1687       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1688       if (rowner == cowner) dnnz[i]++;
1689       else onnz[i]++;
1690     }
1691   }
1692   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1693   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1694   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1695   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1696   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1697 
1698   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1699   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1700   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1701   for (i=0; i<m; i++) {
1702     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1703     PetscInt j0,rowlen;
1704     rowlen = ai[i+1] - ai[i];
1705     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1706       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1707       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1708     }
1709     rowlen = bi[i+1] - bi[i];
1710     for (j0=j=0; j<rowlen; j0=j) {
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714   }
1715   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1716   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1717   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1718   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1719   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1720   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1721   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1722   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1723   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1724   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1725   *B = Aperm;
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1730 {
1731   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1736   if (ghosts) *ghosts = aij->garray;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1741 {
1742   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1743   Mat            A    = mat->A,B = mat->B;
1744   PetscErrorCode ierr;
1745   PetscLogDouble isend[5],irecv[5];
1746 
1747   PetscFunctionBegin;
1748   info->block_size = 1.0;
1749   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1750 
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753 
1754   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1757   isend[3] += info->memory;  isend[4] += info->mallocs;
1758   if (flag == MAT_LOCAL) {
1759     info->nz_used      = isend[0];
1760     info->nz_allocated = isend[1];
1761     info->nz_unneeded  = isend[2];
1762     info->memory       = isend[3];
1763     info->mallocs      = isend[4];
1764   } else if (flag == MAT_GLOBAL_MAX) {
1765     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1766 
1767     info->nz_used      = irecv[0];
1768     info->nz_allocated = irecv[1];
1769     info->nz_unneeded  = irecv[2];
1770     info->memory       = irecv[3];
1771     info->mallocs      = irecv[4];
1772   } else if (flag == MAT_GLOBAL_SUM) {
1773     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1774 
1775     info->nz_used      = irecv[0];
1776     info->nz_allocated = irecv[1];
1777     info->nz_unneeded  = irecv[2];
1778     info->memory       = irecv[3];
1779     info->mallocs      = irecv[4];
1780   }
1781   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1782   info->fill_ratio_needed = 0;
1783   info->factor_mallocs    = 0;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1788 {
1789   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1790   PetscErrorCode ierr;
1791 
1792   PetscFunctionBegin;
1793   switch (op) {
1794   case MAT_NEW_NONZERO_LOCATIONS:
1795   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1796   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1797   case MAT_KEEP_NONZERO_PATTERN:
1798   case MAT_NEW_NONZERO_LOCATION_ERR:
1799   case MAT_USE_INODES:
1800   case MAT_IGNORE_ZERO_ENTRIES:
1801     MatCheckPreallocated(A,1);
1802     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1803     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1804     break;
1805   case MAT_ROW_ORIENTED:
1806     MatCheckPreallocated(A,1);
1807     a->roworiented = flg;
1808 
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_NEW_DIAGONALS:
1813   case MAT_SORTED_FULL:
1814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1815     break;
1816   case MAT_IGNORE_OFF_PROC_ENTRIES:
1817     a->donotstash = flg;
1818     break;
1819   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1820   case MAT_SPD:
1821   case MAT_SYMMETRIC:
1822   case MAT_STRUCTURALLY_SYMMETRIC:
1823   case MAT_HERMITIAN:
1824   case MAT_SYMMETRY_ETERNAL:
1825     break;
1826   case MAT_SUBMAT_SINGLEIS:
1827     A->submat_singleis = flg;
1828     break;
1829   case MAT_STRUCTURE_ONLY:
1830     /* The option is handled directly by MatSetOption() */
1831     break;
1832   default:
1833     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1834   }
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1839 {
1840   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1841   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1842   PetscErrorCode ierr;
1843   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1844   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1845   PetscInt       *cmap,*idx_p;
1846 
1847   PetscFunctionBegin;
1848   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1849   mat->getrowactive = PETSC_TRUE;
1850 
1851   if (!mat->rowvalues && (idx || v)) {
1852     /*
1853         allocate enough space to hold information from the longest row.
1854     */
1855     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1856     PetscInt   max = 1,tmp;
1857     for (i=0; i<matin->rmap->n; i++) {
1858       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1859       if (max < tmp) max = tmp;
1860     }
1861     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1862   }
1863 
1864   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1865   lrow = row - rstart;
1866 
1867   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1868   if (!v)   {pvA = 0; pvB = 0;}
1869   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1870   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1871   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1872   nztot = nzA + nzB;
1873 
1874   cmap = mat->garray;
1875   if (v  || idx) {
1876     if (nztot) {
1877       /* Sort by increasing column numbers, assuming A and B already sorted */
1878       PetscInt imark = -1;
1879       if (v) {
1880         *v = v_p = mat->rowvalues;
1881         for (i=0; i<nzB; i++) {
1882           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1883           else break;
1884         }
1885         imark = i;
1886         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1887         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1888       }
1889       if (idx) {
1890         *idx = idx_p = mat->rowindices;
1891         if (imark > -1) {
1892           for (i=0; i<imark; i++) {
1893             idx_p[i] = cmap[cworkB[i]];
1894           }
1895         } else {
1896           for (i=0; i<nzB; i++) {
1897             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1898             else break;
1899           }
1900           imark = i;
1901         }
1902         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1903         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1904       }
1905     } else {
1906       if (idx) *idx = 0;
1907       if (v)   *v   = 0;
1908     }
1909   }
1910   *nz  = nztot;
1911   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1912   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1917 {
1918   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1919 
1920   PetscFunctionBegin;
1921   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1922   aij->getrowactive = PETSC_FALSE;
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1927 {
1928   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1929   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1930   PetscErrorCode ierr;
1931   PetscInt       i,j,cstart = mat->cmap->rstart;
1932   PetscReal      sum = 0.0;
1933   MatScalar      *v;
1934 
1935   PetscFunctionBegin;
1936   if (aij->size == 1) {
1937     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1938   } else {
1939     if (type == NORM_FROBENIUS) {
1940       v = amat->a;
1941       for (i=0; i<amat->nz; i++) {
1942         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1943       }
1944       v = bmat->a;
1945       for (i=0; i<bmat->nz; i++) {
1946         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1947       }
1948       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1949       *norm = PetscSqrtReal(*norm);
1950       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1951     } else if (type == NORM_1) { /* max column norm */
1952       PetscReal *tmp,*tmp2;
1953       PetscInt  *jj,*garray = aij->garray;
1954       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1955       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1956       *norm = 0.0;
1957       v     = amat->a; jj = amat->j;
1958       for (j=0; j<amat->nz; j++) {
1959         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1960       }
1961       v = bmat->a; jj = bmat->j;
1962       for (j=0; j<bmat->nz; j++) {
1963         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1964       }
1965       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1966       for (j=0; j<mat->cmap->N; j++) {
1967         if (tmp2[j] > *norm) *norm = tmp2[j];
1968       }
1969       ierr = PetscFree(tmp);CHKERRQ(ierr);
1970       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1971       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1972     } else if (type == NORM_INFINITY) { /* max row norm */
1973       PetscReal ntemp = 0.0;
1974       for (j=0; j<aij->A->rmap->n; j++) {
1975         v   = amat->a + amat->i[j];
1976         sum = 0.0;
1977         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1978           sum += PetscAbsScalar(*v); v++;
1979         }
1980         v = bmat->a + bmat->i[j];
1981         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1982           sum += PetscAbsScalar(*v); v++;
1983         }
1984         if (sum > ntemp) ntemp = sum;
1985       }
1986       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1987       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1988     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1989   }
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1994 {
1995   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1996   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1997   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1998   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1999   PetscErrorCode  ierr;
2000   Mat             B,A_diag,*B_diag;
2001   const MatScalar *array;
2002 
2003   PetscFunctionBegin;
2004   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2005   ai = Aloc->i; aj = Aloc->j;
2006   bi = Bloc->i; bj = Bloc->j;
2007   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2008     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2009     PetscSFNode          *oloc;
2010     PETSC_UNUSED PetscSF sf;
2011 
2012     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2013     /* compute d_nnz for preallocation */
2014     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2015     for (i=0; i<ai[ma]; i++) {
2016       d_nnz[aj[i]]++;
2017     }
2018     /* compute local off-diagonal contributions */
2019     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2020     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2021     /* map those to global */
2022     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2023     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2024     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2025     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2026     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2027     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2028     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2029 
2030     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2031     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2032     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2033     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2034     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2035     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2036   } else {
2037     B    = *matout;
2038     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2039   }
2040 
2041   b           = (Mat_MPIAIJ*)B->data;
2042   A_diag      = a->A;
2043   B_diag      = &b->A;
2044   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2045   A_diag_ncol = A_diag->cmap->N;
2046   B_diag_ilen = sub_B_diag->ilen;
2047   B_diag_i    = sub_B_diag->i;
2048 
2049   /* Set ilen for diagonal of B */
2050   for (i=0; i<A_diag_ncol; i++) {
2051     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2052   }
2053 
2054   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2055   very quickly (=without using MatSetValues), because all writes are local. */
2056   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2057 
2058   /* copy over the B part */
2059   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2060   array = Bloc->a;
2061   row   = A->rmap->rstart;
2062   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2063   cols_tmp = cols;
2064   for (i=0; i<mb; i++) {
2065     ncol = bi[i+1]-bi[i];
2066     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2067     row++;
2068     array += ncol; cols_tmp += ncol;
2069   }
2070   ierr = PetscFree(cols);CHKERRQ(ierr);
2071 
2072   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2075     *matout = B;
2076   } else {
2077     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2078   }
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2083 {
2084   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2085   Mat            a    = aij->A,b = aij->B;
2086   PetscErrorCode ierr;
2087   PetscInt       s1,s2,s3;
2088 
2089   PetscFunctionBegin;
2090   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2091   if (rr) {
2092     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2093     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2094     /* Overlap communication with computation. */
2095     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2096   }
2097   if (ll) {
2098     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2099     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2100     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2101   }
2102   /* scale  the diagonal block */
2103   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2104 
2105   if (rr) {
2106     /* Do a scatter end and then right scale the off-diagonal block */
2107     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2108     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2109   }
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2114 {
2115   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2124 {
2125   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2126   Mat            a,b,c,d;
2127   PetscBool      flg;
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   a = matA->A; b = matA->B;
2132   c = matB->A; d = matB->B;
2133 
2134   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2135   if (flg) {
2136     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2137   }
2138   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2143 {
2144   PetscErrorCode ierr;
2145   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2146   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2147 
2148   PetscFunctionBegin;
2149   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2150   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2151     /* because of the column compression in the off-processor part of the matrix a->B,
2152        the number of columns in a->B and b->B may be different, hence we cannot call
2153        the MatCopy() directly on the two parts. If need be, we can provide a more
2154        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2155        then copying the submatrices */
2156     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2157   } else {
2158     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2159     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2160   }
2161   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2166 {
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 /*
2175    Computes the number of nonzeros per row needed for preallocation when X and Y
2176    have different nonzero structure.
2177 */
2178 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2179 {
2180   PetscInt       i,j,k,nzx,nzy;
2181 
2182   PetscFunctionBegin;
2183   /* Set the number of nonzeros in the new matrix */
2184   for (i=0; i<m; i++) {
2185     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2186     nzx = xi[i+1] - xi[i];
2187     nzy = yi[i+1] - yi[i];
2188     nnz[i] = 0;
2189     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2190       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2191       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2192       nnz[i]++;
2193     }
2194     for (; k<nzy; k++) nnz[i]++;
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2200 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2201 {
2202   PetscErrorCode ierr;
2203   PetscInt       m = Y->rmap->N;
2204   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2205   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2213 {
2214   PetscErrorCode ierr;
2215   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2216   PetscBLASInt   bnz,one=1;
2217   Mat_SeqAIJ     *x,*y;
2218 
2219   PetscFunctionBegin;
2220   if (str == SAME_NONZERO_PATTERN) {
2221     PetscScalar alpha = a;
2222     x    = (Mat_SeqAIJ*)xx->A->data;
2223     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2224     y    = (Mat_SeqAIJ*)yy->A->data;
2225     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2226     x    = (Mat_SeqAIJ*)xx->B->data;
2227     y    = (Mat_SeqAIJ*)yy->B->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2230     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2231     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2232        will be updated */
2233 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2234     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2235       Y->offloadmask = PETSC_OFFLOAD_CPU;
2236     }
2237 #endif
2238   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2239     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2240   } else {
2241     Mat      B;
2242     PetscInt *nnz_d,*nnz_o;
2243     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2244     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2245     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2246     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2247     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2248     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2249     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2250     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2251     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2252     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2253     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2254     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2255     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2256     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2257   }
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2262 
2263 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2264 {
2265 #if defined(PETSC_USE_COMPLEX)
2266   PetscErrorCode ierr;
2267   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2271   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2272 #else
2273   PetscFunctionBegin;
2274 #endif
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2279 {
2280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2285   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2290 {
2291   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2292   PetscErrorCode ierr;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2296   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2301 {
2302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2303   PetscErrorCode ierr;
2304   PetscInt       i,*idxb = 0;
2305   PetscScalar    *va,*vb;
2306   Vec            vtmp;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2310   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2311   if (idx) {
2312     for (i=0; i<A->rmap->n; i++) {
2313       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2314     }
2315   }
2316 
2317   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2318   if (idx) {
2319     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2320   }
2321   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2322   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2323 
2324   for (i=0; i<A->rmap->n; i++) {
2325     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2326       va[i] = vb[i];
2327       if (idx) idx[i] = a->garray[idxb[i]];
2328     }
2329   }
2330 
2331   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2332   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2333   ierr = PetscFree(idxb);CHKERRQ(ierr);
2334   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2339 {
2340   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2341   PetscErrorCode ierr;
2342   PetscInt       i,*idxb = 0;
2343   PetscScalar    *va,*vb;
2344   Vec            vtmp;
2345 
2346   PetscFunctionBegin;
2347   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2348   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2349   if (idx) {
2350     for (i=0; i<A->cmap->n; i++) {
2351       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2352     }
2353   }
2354 
2355   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2356   if (idx) {
2357     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2358   }
2359   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2360   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2361 
2362   for (i=0; i<A->rmap->n; i++) {
2363     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2364       va[i] = vb[i];
2365       if (idx) idx[i] = a->garray[idxb[i]];
2366     }
2367   }
2368 
2369   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2370   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2371   ierr = PetscFree(idxb);CHKERRQ(ierr);
2372   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2377 {
2378   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2379   PetscInt       n      = A->rmap->n;
2380   PetscInt       cstart = A->cmap->rstart;
2381   PetscInt       *cmap  = mat->garray;
2382   PetscInt       *diagIdx, *offdiagIdx;
2383   Vec            diagV, offdiagV;
2384   PetscScalar    *a, *diagA, *offdiagA;
2385   PetscInt       r;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2391   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2392   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2393   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2394   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2395   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2396   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2397   for (r = 0; r < n; ++r) {
2398     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2399       a[r]   = diagA[r];
2400       idx[r] = cstart + diagIdx[r];
2401     } else {
2402       a[r]   = offdiagA[r];
2403       idx[r] = cmap[offdiagIdx[r]];
2404     }
2405   }
2406   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2408   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2409   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2410   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2411   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2416 {
2417   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2418   PetscInt       n      = A->rmap->n;
2419   PetscInt       cstart = A->cmap->rstart;
2420   PetscInt       *cmap  = mat->garray;
2421   PetscInt       *diagIdx, *offdiagIdx;
2422   Vec            diagV, offdiagV;
2423   PetscScalar    *a, *diagA, *offdiagA;
2424   PetscInt       r;
2425   PetscErrorCode ierr;
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2429   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2430   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2431   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2432   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2433   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2434   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2435   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2436   for (r = 0; r < n; ++r) {
2437     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2438       a[r]   = diagA[r];
2439       idx[r] = cstart + diagIdx[r];
2440     } else {
2441       a[r]   = offdiagA[r];
2442       idx[r] = cmap[offdiagIdx[r]];
2443     }
2444   }
2445   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2446   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2447   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2448   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2449   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2450   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2455 {
2456   PetscErrorCode ierr;
2457   Mat            *dummy;
2458 
2459   PetscFunctionBegin;
2460   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2461   *newmat = *dummy;
2462   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2467 {
2468   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2469   PetscErrorCode ierr;
2470 
2471   PetscFunctionBegin;
2472   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2473   A->factorerrortype = a->A->factorerrortype;
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2478 {
2479   PetscErrorCode ierr;
2480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2481 
2482   PetscFunctionBegin;
2483   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2484   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2485   if (x->assembled) {
2486     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2487   } else {
2488     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2489   }
2490   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2491   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2496 {
2497   PetscFunctionBegin;
2498   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2499   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 /*@
2504    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2505 
2506    Collective on Mat
2507 
2508    Input Parameters:
2509 +    A - the matrix
2510 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2511 
2512  Level: advanced
2513 
2514 @*/
2515 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2516 {
2517   PetscErrorCode       ierr;
2518 
2519   PetscFunctionBegin;
2520   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2525 {
2526   PetscErrorCode       ierr;
2527   PetscBool            sc = PETSC_FALSE,flg;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2531   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2532   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2533   if (flg) {
2534     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2535   }
2536   ierr = PetscOptionsTail();CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2541 {
2542   PetscErrorCode ierr;
2543   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2544   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2545 
2546   PetscFunctionBegin;
2547   if (!Y->preallocated) {
2548     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2549   } else if (!aij->nz) {
2550     PetscInt nonew = aij->nonew;
2551     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2552     aij->nonew = nonew;
2553   }
2554   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2559 {
2560   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2561   PetscErrorCode ierr;
2562 
2563   PetscFunctionBegin;
2564   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2565   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2566   if (d) {
2567     PetscInt rstart;
2568     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2569     *d += rstart;
2570 
2571   }
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2576 {
2577   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2578   PetscErrorCode ierr;
2579 
2580   PetscFunctionBegin;
2581   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 /* -------------------------------------------------------------------*/
2586 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2587                                        MatGetRow_MPIAIJ,
2588                                        MatRestoreRow_MPIAIJ,
2589                                        MatMult_MPIAIJ,
2590                                 /* 4*/ MatMultAdd_MPIAIJ,
2591                                        MatMultTranspose_MPIAIJ,
2592                                        MatMultTransposeAdd_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*10*/ 0,
2597                                        0,
2598                                        0,
2599                                        MatSOR_MPIAIJ,
2600                                        MatTranspose_MPIAIJ,
2601                                 /*15*/ MatGetInfo_MPIAIJ,
2602                                        MatEqual_MPIAIJ,
2603                                        MatGetDiagonal_MPIAIJ,
2604                                        MatDiagonalScale_MPIAIJ,
2605                                        MatNorm_MPIAIJ,
2606                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2607                                        MatAssemblyEnd_MPIAIJ,
2608                                        MatSetOption_MPIAIJ,
2609                                        MatZeroEntries_MPIAIJ,
2610                                 /*24*/ MatZeroRows_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*29*/ MatSetUp_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        MatGetDiagonalBlock_MPIAIJ,
2619                                        0,
2620                                 /*34*/ MatDuplicate_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*39*/ MatAXPY_MPIAIJ,
2626                                        MatCreateSubMatrices_MPIAIJ,
2627                                        MatIncreaseOverlap_MPIAIJ,
2628                                        MatGetValues_MPIAIJ,
2629                                        MatCopy_MPIAIJ,
2630                                 /*44*/ MatGetRowMax_MPIAIJ,
2631                                        MatScale_MPIAIJ,
2632                                        MatShift_MPIAIJ,
2633                                        MatDiagonalSet_MPIAIJ,
2634                                        MatZeroRowsColumns_MPIAIJ,
2635                                 /*49*/ MatSetRandom_MPIAIJ,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2641                                        0,
2642                                        MatSetUnfactored_MPIAIJ,
2643                                        MatPermute_MPIAIJ,
2644                                        0,
2645                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2646                                        MatDestroy_MPIAIJ,
2647                                        MatView_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                 /*64*/ 0,
2651                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2656                                        MatGetRowMinAbs_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*75*/ MatFDColoringApply_AIJ,
2662                                        MatSetFromOptions_MPIAIJ,
2663                                        0,
2664                                        0,
2665                                        MatFindZeroDiagonals_MPIAIJ,
2666                                 /*80*/ 0,
2667                                        0,
2668                                        0,
2669                                 /*83*/ MatLoad_MPIAIJ,
2670                                        MatIsSymmetric_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*89*/ 0,
2676                                        0,
2677                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        MatBindToCPU_MPIAIJ,
2685                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                        MatConjugate_MPIAIJ,
2689                                        0,
2690                                 /*104*/MatSetValuesRow_MPIAIJ,
2691                                        MatRealPart_MPIAIJ,
2692                                        MatImaginaryPart_MPIAIJ,
2693                                        0,
2694                                        0,
2695                                 /*109*/0,
2696                                        0,
2697                                        MatGetRowMin_MPIAIJ,
2698                                        0,
2699                                        MatMissingDiagonal_MPIAIJ,
2700                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2701                                        0,
2702                                        MatGetGhosts_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                 /*119*/0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        MatGetMultiProcBlock_MPIAIJ,
2710                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2711                                        MatGetColumnNorms_MPIAIJ,
2712                                        MatInvertBlockDiagonal_MPIAIJ,
2713                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2714                                        MatCreateSubMatricesMPI_MPIAIJ,
2715                                 /*129*/0,
2716                                        0,
2717                                        0,
2718                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2719                                        0,
2720                                 /*134*/0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                 /*139*/MatSetBlockSizes_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                        MatFDColoringSetUp_MPIXAIJ,
2729                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2730                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2731                                 /*145*/0,
2732                                        0,
2733                                        0
2734 };
2735 
2736 /* ----------------------------------------------------------------------------------------*/
2737 
2738 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2739 {
2740   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2745   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2750 {
2751   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2752   PetscErrorCode ierr;
2753 
2754   PetscFunctionBegin;
2755   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2756   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2761 {
2762   Mat_MPIAIJ     *b;
2763   PetscErrorCode ierr;
2764   PetscMPIInt    size;
2765 
2766   PetscFunctionBegin;
2767   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2768   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2769   b = (Mat_MPIAIJ*)B->data;
2770 
2771 #if defined(PETSC_USE_CTABLE)
2772   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2773 #else
2774   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2775 #endif
2776   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2777   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2778   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2779 
2780   /* Because the B will have been resized we simply destroy it and create a new one each time */
2781   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2782   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2783   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2784   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2785   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2786   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2787   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2788 
2789   if (!B->preallocated) {
2790     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2791     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2792     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2793     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2795   }
2796 
2797   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2799   B->preallocated  = PETSC_TRUE;
2800   B->was_assembled = PETSC_FALSE;
2801   B->assembled     = PETSC_FALSE;
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2806 {
2807   Mat_MPIAIJ     *b;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2812   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2813   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2814   b = (Mat_MPIAIJ*)B->data;
2815 
2816 #if defined(PETSC_USE_CTABLE)
2817   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2818 #else
2819   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2820 #endif
2821   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2822   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2823   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2824 
2825   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2826   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2827   B->preallocated  = PETSC_TRUE;
2828   B->was_assembled = PETSC_FALSE;
2829   B->assembled = PETSC_FALSE;
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2834 {
2835   Mat            mat;
2836   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2837   PetscErrorCode ierr;
2838 
2839   PetscFunctionBegin;
2840   *newmat = 0;
2841   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2842   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2843   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2844   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2845   a       = (Mat_MPIAIJ*)mat->data;
2846 
2847   mat->factortype   = matin->factortype;
2848   mat->assembled    = matin->assembled;
2849   mat->insertmode   = NOT_SET_VALUES;
2850   mat->preallocated = matin->preallocated;
2851 
2852   a->size         = oldmat->size;
2853   a->rank         = oldmat->rank;
2854   a->donotstash   = oldmat->donotstash;
2855   a->roworiented  = oldmat->roworiented;
2856   a->rowindices   = NULL;
2857   a->rowvalues    = NULL;
2858   a->getrowactive = PETSC_FALSE;
2859 
2860   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2861   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2862 
2863   if (oldmat->colmap) {
2864 #if defined(PETSC_USE_CTABLE)
2865     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2866 #else
2867     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2868     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2869     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2870 #endif
2871   } else a->colmap = NULL;
2872   if (oldmat->garray) {
2873     PetscInt len;
2874     len  = oldmat->B->cmap->n;
2875     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2876     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2877     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2878   } else a->garray = NULL;
2879 
2880   /* It may happen MatDuplicate is called with a non-assembled matrix
2881      In fact, MatDuplicate only requires the matrix to be preallocated
2882      This may happen inside a DMCreateMatrix_Shell */
2883   if (oldmat->lvec) {
2884     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2885     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2886   }
2887   if (oldmat->Mvctx) {
2888     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2890   }
2891   if (oldmat->Mvctx_mpi1) {
2892     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2893     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2894   }
2895 
2896   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2897   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2898   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2899   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2900   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2901   *newmat = mat;
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2906 {
2907   PetscBool      isbinary, ishdf5;
2908   PetscErrorCode ierr;
2909 
2910   PetscFunctionBegin;
2911   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2912   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2913   /* force binary viewer to load .info file if it has not yet done so */
2914   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2915   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2916   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2917   if (isbinary) {
2918     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2919   } else if (ishdf5) {
2920 #if defined(PETSC_HAVE_HDF5)
2921     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2922 #else
2923     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2924 #endif
2925   } else {
2926     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2927   }
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2932 {
2933   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2934   PetscInt       *rowidxs,*colidxs;
2935   PetscScalar    *matvals;
2936   PetscErrorCode ierr;
2937 
2938   PetscFunctionBegin;
2939   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2940 
2941   /* read in matrix header */
2942   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2943   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2944   M  = header[1]; N = header[2]; nz = header[3];
2945   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2946   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2947   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2948 
2949   /* set block sizes from the viewer's .info file */
2950   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2951   /* set global sizes if not set already */
2952   if (mat->rmap->N < 0) mat->rmap->N = M;
2953   if (mat->cmap->N < 0) mat->cmap->N = N;
2954   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2955   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2956 
2957   /* check if the matrix sizes are correct */
2958   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2959   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2960 
2961   /* read in row lengths and build row indices */
2962   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2963   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2964   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2965   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2966   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2967   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2968   /* read in column indices and matrix values */
2969   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2970   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2971   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2972   /* store matrix indices and values */
2973   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2974   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2975   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 /* Not scalable because of ISAllGather() unless getting all columns. */
2980 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2981 {
2982   PetscErrorCode ierr;
2983   IS             iscol_local;
2984   PetscBool      isstride;
2985   PetscMPIInt    lisstride=0,gisstride;
2986 
2987   PetscFunctionBegin;
2988   /* check if we are grabbing all columns*/
2989   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2990 
2991   if (isstride) {
2992     PetscInt  start,len,mstart,mlen;
2993     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2994     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2995     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2996     if (mstart == start && mlen-mstart == len) lisstride = 1;
2997   }
2998 
2999   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3000   if (gisstride) {
3001     PetscInt N;
3002     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3003     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3004     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3005     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3006   } else {
3007     PetscInt cbs;
3008     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3009     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3010     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3011   }
3012 
3013   *isseq = iscol_local;
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 /*
3018  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3019  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3020 
3021  Input Parameters:
3022    mat - matrix
3023    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3024            i.e., mat->rstart <= isrow[i] < mat->rend
3025    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3026            i.e., mat->cstart <= iscol[i] < mat->cend
3027  Output Parameter:
3028    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3029    iscol_o - sequential column index set for retrieving mat->B
3030    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3031  */
3032 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3033 {
3034   PetscErrorCode ierr;
3035   Vec            x,cmap;
3036   const PetscInt *is_idx;
3037   PetscScalar    *xarray,*cmaparray;
3038   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3039   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3040   Mat            B=a->B;
3041   Vec            lvec=a->lvec,lcmap;
3042   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3043   MPI_Comm       comm;
3044   VecScatter     Mvctx=a->Mvctx;
3045 
3046   PetscFunctionBegin;
3047   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3048   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3049 
3050   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3051   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3052   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3053   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3054   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3055 
3056   /* Get start indices */
3057   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3058   isstart -= ncols;
3059   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3060 
3061   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3062   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3063   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3064   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3065   for (i=0; i<ncols; i++) {
3066     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3067     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3068     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3069   }
3070   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3071   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3072   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3073 
3074   /* Get iscol_d */
3075   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3076   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3077   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3078 
3079   /* Get isrow_d */
3080   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3081   rstart = mat->rmap->rstart;
3082   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3083   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3084   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3085   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3086 
3087   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3088   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3089   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3090 
3091   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3092   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3093   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3094 
3095   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3096 
3097   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3098   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3099 
3100   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3101   /* off-process column indices */
3102   count = 0;
3103   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3104   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3105 
3106   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3107   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3108   for (i=0; i<Bn; i++) {
3109     if (PetscRealPart(xarray[i]) > -1.0) {
3110       idx[count]     = i;                   /* local column index in off-diagonal part B */
3111       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3112       count++;
3113     }
3114   }
3115   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3116   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3117 
3118   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3119   /* cannot ensure iscol_o has same blocksize as iscol! */
3120 
3121   ierr = PetscFree(idx);CHKERRQ(ierr);
3122   *garray = cmap1;
3123 
3124   ierr = VecDestroy(&x);CHKERRQ(ierr);
3125   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3126   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3127   PetscFunctionReturn(0);
3128 }
3129 
3130 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3131 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3132 {
3133   PetscErrorCode ierr;
3134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3135   Mat            M = NULL;
3136   MPI_Comm       comm;
3137   IS             iscol_d,isrow_d,iscol_o;
3138   Mat            Asub = NULL,Bsub = NULL;
3139   PetscInt       n;
3140 
3141   PetscFunctionBegin;
3142   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3143 
3144   if (call == MAT_REUSE_MATRIX) {
3145     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3146     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3147     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3148 
3149     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3150     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3151 
3152     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3153     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3154 
3155     /* Update diagonal and off-diagonal portions of submat */
3156     asub = (Mat_MPIAIJ*)(*submat)->data;
3157     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3158     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3159     if (n) {
3160       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3161     }
3162     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164 
3165   } else { /* call == MAT_INITIAL_MATRIX) */
3166     const PetscInt *garray;
3167     PetscInt        BsubN;
3168 
3169     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3170     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3171 
3172     /* Create local submatrices Asub and Bsub */
3173     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3174     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3175 
3176     /* Create submatrix M */
3177     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3178 
3179     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3180     asub = (Mat_MPIAIJ*)M->data;
3181 
3182     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3183     n = asub->B->cmap->N;
3184     if (BsubN > n) {
3185       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3186       const PetscInt *idx;
3187       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3188       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3189 
3190       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3191       j = 0;
3192       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3193       for (i=0; i<n; i++) {
3194         if (j >= BsubN) break;
3195         while (subgarray[i] > garray[j]) j++;
3196 
3197         if (subgarray[i] == garray[j]) {
3198           idx_new[i] = idx[j++];
3199         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3200       }
3201       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3202 
3203       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3204       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3205 
3206     } else if (BsubN < n) {
3207       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3208     }
3209 
3210     ierr = PetscFree(garray);CHKERRQ(ierr);
3211     *submat = M;
3212 
3213     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3214     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3215     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3216 
3217     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3218     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3219 
3220     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3221     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3222   }
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3227 {
3228   PetscErrorCode ierr;
3229   IS             iscol_local=NULL,isrow_d;
3230   PetscInt       csize;
3231   PetscInt       n,i,j,start,end;
3232   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3233   MPI_Comm       comm;
3234 
3235   PetscFunctionBegin;
3236   /* If isrow has same processor distribution as mat,
3237      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3238   if (call == MAT_REUSE_MATRIX) {
3239     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3240     if (isrow_d) {
3241       sameRowDist  = PETSC_TRUE;
3242       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3243     } else {
3244       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3245       if (iscol_local) {
3246         sameRowDist  = PETSC_TRUE;
3247         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3248       }
3249     }
3250   } else {
3251     /* Check if isrow has same processor distribution as mat */
3252     sameDist[0] = PETSC_FALSE;
3253     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3254     if (!n) {
3255       sameDist[0] = PETSC_TRUE;
3256     } else {
3257       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3258       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3259       if (i >= start && j < end) {
3260         sameDist[0] = PETSC_TRUE;
3261       }
3262     }
3263 
3264     /* Check if iscol has same processor distribution as mat */
3265     sameDist[1] = PETSC_FALSE;
3266     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3267     if (!n) {
3268       sameDist[1] = PETSC_TRUE;
3269     } else {
3270       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3271       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3272       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3273     }
3274 
3275     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3276     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3277     sameRowDist = tsameDist[0];
3278   }
3279 
3280   if (sameRowDist) {
3281     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3282       /* isrow and iscol have same processor distribution as mat */
3283       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3284       PetscFunctionReturn(0);
3285     } else { /* sameRowDist */
3286       /* isrow has same processor distribution as mat */
3287       if (call == MAT_INITIAL_MATRIX) {
3288         PetscBool sorted;
3289         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3290         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3291         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3292         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3293 
3294         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3295         if (sorted) {
3296           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3297           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3298           PetscFunctionReturn(0);
3299         }
3300       } else { /* call == MAT_REUSE_MATRIX */
3301         IS    iscol_sub;
3302         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3303         if (iscol_sub) {
3304           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3305           PetscFunctionReturn(0);
3306         }
3307       }
3308     }
3309   }
3310 
3311   /* General case: iscol -> iscol_local which has global size of iscol */
3312   if (call == MAT_REUSE_MATRIX) {
3313     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3314     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3315   } else {
3316     if (!iscol_local) {
3317       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3318     }
3319   }
3320 
3321   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3322   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3323 
3324   if (call == MAT_INITIAL_MATRIX) {
3325     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3326     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3327   }
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 /*@C
3332      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3333          and "off-diagonal" part of the matrix in CSR format.
3334 
3335    Collective
3336 
3337    Input Parameters:
3338 +  comm - MPI communicator
3339 .  A - "diagonal" portion of matrix
3340 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3341 -  garray - global index of B columns
3342 
3343    Output Parameter:
3344 .   mat - the matrix, with input A as its local diagonal matrix
3345    Level: advanced
3346 
3347    Notes:
3348        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3349        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3350 
3351 .seealso: MatCreateMPIAIJWithSplitArrays()
3352 @*/
3353 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3354 {
3355   PetscErrorCode ierr;
3356   Mat_MPIAIJ     *maij;
3357   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3358   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3359   PetscScalar    *oa=b->a;
3360   Mat            Bnew;
3361   PetscInt       m,n,N;
3362 
3363   PetscFunctionBegin;
3364   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3365   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3366   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3367   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3368   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3369   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3370 
3371   /* Get global columns of mat */
3372   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3373 
3374   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3375   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3376   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3377   maij = (Mat_MPIAIJ*)(*mat)->data;
3378 
3379   (*mat)->preallocated = PETSC_TRUE;
3380 
3381   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3382   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3383 
3384   /* Set A as diagonal portion of *mat */
3385   maij->A = A;
3386 
3387   nz = oi[m];
3388   for (i=0; i<nz; i++) {
3389     col   = oj[i];
3390     oj[i] = garray[col];
3391   }
3392 
3393    /* Set Bnew as off-diagonal portion of *mat */
3394   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3395   bnew        = (Mat_SeqAIJ*)Bnew->data;
3396   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3397   maij->B     = Bnew;
3398 
3399   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3400 
3401   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3402   b->free_a       = PETSC_FALSE;
3403   b->free_ij      = PETSC_FALSE;
3404   ierr = MatDestroy(&B);CHKERRQ(ierr);
3405 
3406   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3407   bnew->free_a       = PETSC_TRUE;
3408   bnew->free_ij      = PETSC_TRUE;
3409 
3410   /* condense columns of maij->B */
3411   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3412   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3413   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3414   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3415   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3420 
3421 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3422 {
3423   PetscErrorCode ierr;
3424   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3425   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3426   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3427   Mat            M,Msub,B=a->B;
3428   MatScalar      *aa;
3429   Mat_SeqAIJ     *aij;
3430   PetscInt       *garray = a->garray,*colsub,Ncols;
3431   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3432   IS             iscol_sub,iscmap;
3433   const PetscInt *is_idx,*cmap;
3434   PetscBool      allcolumns=PETSC_FALSE;
3435   MPI_Comm       comm;
3436 
3437   PetscFunctionBegin;
3438   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3439 
3440   if (call == MAT_REUSE_MATRIX) {
3441     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3442     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3443     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3444 
3445     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3446     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3447 
3448     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3449     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3450 
3451     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3452 
3453   } else { /* call == MAT_INITIAL_MATRIX) */
3454     PetscBool flg;
3455 
3456     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3457     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3458 
3459     /* (1) iscol -> nonscalable iscol_local */
3460     /* Check for special case: each processor gets entire matrix columns */
3461     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3462     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3463     if (allcolumns) {
3464       iscol_sub = iscol_local;
3465       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3466       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3467 
3468     } else {
3469       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3470       PetscInt *idx,*cmap1,k;
3471       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3472       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3473       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3474       count = 0;
3475       k     = 0;
3476       for (i=0; i<Ncols; i++) {
3477         j = is_idx[i];
3478         if (j >= cstart && j < cend) {
3479           /* diagonal part of mat */
3480           idx[count]     = j;
3481           cmap1[count++] = i; /* column index in submat */
3482         } else if (Bn) {
3483           /* off-diagonal part of mat */
3484           if (j == garray[k]) {
3485             idx[count]     = j;
3486             cmap1[count++] = i;  /* column index in submat */
3487           } else if (j > garray[k]) {
3488             while (j > garray[k] && k < Bn-1) k++;
3489             if (j == garray[k]) {
3490               idx[count]     = j;
3491               cmap1[count++] = i; /* column index in submat */
3492             }
3493           }
3494         }
3495       }
3496       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3497 
3498       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3499       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3500       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3501 
3502       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3503     }
3504 
3505     /* (3) Create sequential Msub */
3506     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3507   }
3508 
3509   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3510   aij  = (Mat_SeqAIJ*)(Msub)->data;
3511   ii   = aij->i;
3512   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3513 
3514   /*
3515       m - number of local rows
3516       Ncols - number of columns (same on all processors)
3517       rstart - first row in new global matrix generated
3518   */
3519   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3520 
3521   if (call == MAT_INITIAL_MATRIX) {
3522     /* (4) Create parallel newmat */
3523     PetscMPIInt    rank,size;
3524     PetscInt       csize;
3525 
3526     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3527     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3528 
3529     /*
3530         Determine the number of non-zeros in the diagonal and off-diagonal
3531         portions of the matrix in order to do correct preallocation
3532     */
3533 
3534     /* first get start and end of "diagonal" columns */
3535     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3536     if (csize == PETSC_DECIDE) {
3537       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3538       if (mglobal == Ncols) { /* square matrix */
3539         nlocal = m;
3540       } else {
3541         nlocal = Ncols/size + ((Ncols % size) > rank);
3542       }
3543     } else {
3544       nlocal = csize;
3545     }
3546     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3547     rstart = rend - nlocal;
3548     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3549 
3550     /* next, compute all the lengths */
3551     jj    = aij->j;
3552     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3553     olens = dlens + m;
3554     for (i=0; i<m; i++) {
3555       jend = ii[i+1] - ii[i];
3556       olen = 0;
3557       dlen = 0;
3558       for (j=0; j<jend; j++) {
3559         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3560         else dlen++;
3561         jj++;
3562       }
3563       olens[i] = olen;
3564       dlens[i] = dlen;
3565     }
3566 
3567     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3568     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3569 
3570     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3571     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3572     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3573     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3574     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3575     ierr = PetscFree(dlens);CHKERRQ(ierr);
3576 
3577   } else { /* call == MAT_REUSE_MATRIX */
3578     M    = *newmat;
3579     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3580     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3581     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3582     /*
3583          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3584        rather than the slower MatSetValues().
3585     */
3586     M->was_assembled = PETSC_TRUE;
3587     M->assembled     = PETSC_FALSE;
3588   }
3589 
3590   /* (5) Set values of Msub to *newmat */
3591   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3592   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3593 
3594   jj   = aij->j;
3595   aa   = aij->a;
3596   for (i=0; i<m; i++) {
3597     row = rstart + i;
3598     nz  = ii[i+1] - ii[i];
3599     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3600     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3601     jj += nz; aa += nz;
3602   }
3603   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3604 
3605   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607 
3608   ierr = PetscFree(colsub);CHKERRQ(ierr);
3609 
3610   /* save Msub, iscol_sub and iscmap used in processor for next request */
3611   if (call ==  MAT_INITIAL_MATRIX) {
3612     *newmat = M;
3613     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3614     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3615 
3616     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3617     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3618 
3619     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3620     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3621 
3622     if (iscol_local) {
3623       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3624       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3625     }
3626   }
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 /*
3631     Not great since it makes two copies of the submatrix, first an SeqAIJ
3632   in local and then by concatenating the local matrices the end result.
3633   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3634 
3635   Note: This requires a sequential iscol with all indices.
3636 */
3637 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3638 {
3639   PetscErrorCode ierr;
3640   PetscMPIInt    rank,size;
3641   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3642   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3643   Mat            M,Mreuse;
3644   MatScalar      *aa,*vwork;
3645   MPI_Comm       comm;
3646   Mat_SeqAIJ     *aij;
3647   PetscBool      colflag,allcolumns=PETSC_FALSE;
3648 
3649   PetscFunctionBegin;
3650   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3651   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3652   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3653 
3654   /* Check for special case: each processor gets entire matrix columns */
3655   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3656   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3657   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3658 
3659   if (call ==  MAT_REUSE_MATRIX) {
3660     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3661     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3662     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3663   } else {
3664     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3665   }
3666 
3667   /*
3668       m - number of local rows
3669       n - number of columns (same on all processors)
3670       rstart - first row in new global matrix generated
3671   */
3672   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3673   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3674   if (call == MAT_INITIAL_MATRIX) {
3675     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3676     ii  = aij->i;
3677     jj  = aij->j;
3678 
3679     /*
3680         Determine the number of non-zeros in the diagonal and off-diagonal
3681         portions of the matrix in order to do correct preallocation
3682     */
3683 
3684     /* first get start and end of "diagonal" columns */
3685     if (csize == PETSC_DECIDE) {
3686       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3687       if (mglobal == n) { /* square matrix */
3688         nlocal = m;
3689       } else {
3690         nlocal = n/size + ((n % size) > rank);
3691       }
3692     } else {
3693       nlocal = csize;
3694     }
3695     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3696     rstart = rend - nlocal;
3697     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3698 
3699     /* next, compute all the lengths */
3700     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3701     olens = dlens + m;
3702     for (i=0; i<m; i++) {
3703       jend = ii[i+1] - ii[i];
3704       olen = 0;
3705       dlen = 0;
3706       for (j=0; j<jend; j++) {
3707         if (*jj < rstart || *jj >= rend) olen++;
3708         else dlen++;
3709         jj++;
3710       }
3711       olens[i] = olen;
3712       dlens[i] = dlen;
3713     }
3714     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3715     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3716     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3717     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3718     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3719     ierr = PetscFree(dlens);CHKERRQ(ierr);
3720   } else {
3721     PetscInt ml,nl;
3722 
3723     M    = *newmat;
3724     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3725     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3726     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3727     /*
3728          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3729        rather than the slower MatSetValues().
3730     */
3731     M->was_assembled = PETSC_TRUE;
3732     M->assembled     = PETSC_FALSE;
3733   }
3734   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3735   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3736   ii   = aij->i;
3737   jj   = aij->j;
3738   aa   = aij->a;
3739   for (i=0; i<m; i++) {
3740     row   = rstart + i;
3741     nz    = ii[i+1] - ii[i];
3742     cwork = jj;     jj += nz;
3743     vwork = aa;     aa += nz;
3744     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3745   }
3746 
3747   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3748   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3749   *newmat = M;
3750 
3751   /* save submatrix used in processor for next request */
3752   if (call ==  MAT_INITIAL_MATRIX) {
3753     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3754     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3760 {
3761   PetscInt       m,cstart, cend,j,nnz,i,d;
3762   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3763   const PetscInt *JJ;
3764   PetscErrorCode ierr;
3765   PetscBool      nooffprocentries;
3766 
3767   PetscFunctionBegin;
3768   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3769 
3770   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3771   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3772   m      = B->rmap->n;
3773   cstart = B->cmap->rstart;
3774   cend   = B->cmap->rend;
3775   rstart = B->rmap->rstart;
3776 
3777   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3778 
3779   if (PetscDefined(USE_DEBUG)) {
3780     for (i=0; i<m; i++) {
3781       nnz = Ii[i+1]- Ii[i];
3782       JJ  = J + Ii[i];
3783       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3784       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3785       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3786     }
3787   }
3788 
3789   for (i=0; i<m; i++) {
3790     nnz     = Ii[i+1]- Ii[i];
3791     JJ      = J + Ii[i];
3792     nnz_max = PetscMax(nnz_max,nnz);
3793     d       = 0;
3794     for (j=0; j<nnz; j++) {
3795       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3796     }
3797     d_nnz[i] = d;
3798     o_nnz[i] = nnz - d;
3799   }
3800   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3801   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3802 
3803   for (i=0; i<m; i++) {
3804     ii   = i + rstart;
3805     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3806   }
3807   nooffprocentries    = B->nooffprocentries;
3808   B->nooffprocentries = PETSC_TRUE;
3809   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811   B->nooffprocentries = nooffprocentries;
3812 
3813   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3814   PetscFunctionReturn(0);
3815 }
3816 
3817 /*@
3818    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3819    (the default parallel PETSc format).
3820 
3821    Collective
3822 
3823    Input Parameters:
3824 +  B - the matrix
3825 .  i - the indices into j for the start of each local row (starts with zero)
3826 .  j - the column indices for each local row (starts with zero)
3827 -  v - optional values in the matrix
3828 
3829    Level: developer
3830 
3831    Notes:
3832        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3833      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3834      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3835 
3836        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3837 
3838        The format which is used for the sparse matrix input, is equivalent to a
3839     row-major ordering.. i.e for the following matrix, the input data expected is
3840     as shown
3841 
3842 $        1 0 0
3843 $        2 0 3     P0
3844 $       -------
3845 $        4 5 6     P1
3846 $
3847 $     Process0 [P0]: rows_owned=[0,1]
3848 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3849 $        j =  {0,0,2}  [size = 3]
3850 $        v =  {1,2,3}  [size = 3]
3851 $
3852 $     Process1 [P1]: rows_owned=[2]
3853 $        i =  {0,3}    [size = nrow+1  = 1+1]
3854 $        j =  {0,1,2}  [size = 3]
3855 $        v =  {4,5,6}  [size = 3]
3856 
3857 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3858           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3859 @*/
3860 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3861 {
3862   PetscErrorCode ierr;
3863 
3864   PetscFunctionBegin;
3865   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3866   PetscFunctionReturn(0);
3867 }
3868 
3869 /*@C
3870    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3871    (the default parallel PETSc format).  For good matrix assembly performance
3872    the user should preallocate the matrix storage by setting the parameters
3873    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3874    performance can be increased by more than a factor of 50.
3875 
3876    Collective
3877 
3878    Input Parameters:
3879 +  B - the matrix
3880 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3881            (same value is used for all local rows)
3882 .  d_nnz - array containing the number of nonzeros in the various rows of the
3883            DIAGONAL portion of the local submatrix (possibly different for each row)
3884            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3885            The size of this array is equal to the number of local rows, i.e 'm'.
3886            For matrices that will be factored, you must leave room for (and set)
3887            the diagonal entry even if it is zero.
3888 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3889            submatrix (same value is used for all local rows).
3890 -  o_nnz - array containing the number of nonzeros in the various rows of the
3891            OFF-DIAGONAL portion of the local submatrix (possibly different for
3892            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3893            structure. The size of this array is equal to the number
3894            of local rows, i.e 'm'.
3895 
3896    If the *_nnz parameter is given then the *_nz parameter is ignored
3897 
3898    The AIJ format (also called the Yale sparse matrix format or
3899    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3900    storage.  The stored row and column indices begin with zero.
3901    See Users-Manual: ch_mat for details.
3902 
3903    The parallel matrix is partitioned such that the first m0 rows belong to
3904    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3905    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3906 
3907    The DIAGONAL portion of the local submatrix of a processor can be defined
3908    as the submatrix which is obtained by extraction the part corresponding to
3909    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3910    first row that belongs to the processor, r2 is the last row belonging to
3911    the this processor, and c1-c2 is range of indices of the local part of a
3912    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3913    common case of a square matrix, the row and column ranges are the same and
3914    the DIAGONAL part is also square. The remaining portion of the local
3915    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3916 
3917    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3918 
3919    You can call MatGetInfo() to get information on how effective the preallocation was;
3920    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3921    You can also run with the option -info and look for messages with the string
3922    malloc in them to see if additional memory allocation was needed.
3923 
3924    Example usage:
3925 
3926    Consider the following 8x8 matrix with 34 non-zero values, that is
3927    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3928    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3929    as follows:
3930 
3931 .vb
3932             1  2  0  |  0  3  0  |  0  4
3933     Proc0   0  5  6  |  7  0  0  |  8  0
3934             9  0 10  | 11  0  0  | 12  0
3935     -------------------------------------
3936            13  0 14  | 15 16 17  |  0  0
3937     Proc1   0 18  0  | 19 20 21  |  0  0
3938             0  0  0  | 22 23  0  | 24  0
3939     -------------------------------------
3940     Proc2  25 26 27  |  0  0 28  | 29  0
3941            30  0  0  | 31 32 33  |  0 34
3942 .ve
3943 
3944    This can be represented as a collection of submatrices as:
3945 
3946 .vb
3947       A B C
3948       D E F
3949       G H I
3950 .ve
3951 
3952    Where the submatrices A,B,C are owned by proc0, D,E,F are
3953    owned by proc1, G,H,I are owned by proc2.
3954 
3955    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3956    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3957    The 'M','N' parameters are 8,8, and have the same values on all procs.
3958 
3959    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3960    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3961    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3962    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3963    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3964    matrix, ans [DF] as another SeqAIJ matrix.
3965 
3966    When d_nz, o_nz parameters are specified, d_nz storage elements are
3967    allocated for every row of the local diagonal submatrix, and o_nz
3968    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3969    One way to choose d_nz and o_nz is to use the max nonzerors per local
3970    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3971    In this case, the values of d_nz,o_nz are:
3972 .vb
3973      proc0 : dnz = 2, o_nz = 2
3974      proc1 : dnz = 3, o_nz = 2
3975      proc2 : dnz = 1, o_nz = 4
3976 .ve
3977    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3978    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3979    for proc3. i.e we are using 12+15+10=37 storage locations to store
3980    34 values.
3981 
3982    When d_nnz, o_nnz parameters are specified, the storage is specified
3983    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3984    In the above case the values for d_nnz,o_nnz are:
3985 .vb
3986      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3987      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3988      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3989 .ve
3990    Here the space allocated is sum of all the above values i.e 34, and
3991    hence pre-allocation is perfect.
3992 
3993    Level: intermediate
3994 
3995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3996           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3997 @*/
3998 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3999 {
4000   PetscErrorCode ierr;
4001 
4002   PetscFunctionBegin;
4003   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4004   PetscValidType(B,1);
4005   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4006   PetscFunctionReturn(0);
4007 }
4008 
4009 /*@
4010      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4011          CSR format for the local rows.
4012 
4013    Collective
4014 
4015    Input Parameters:
4016 +  comm - MPI communicator
4017 .  m - number of local rows (Cannot be PETSC_DECIDE)
4018 .  n - This value should be the same as the local size used in creating the
4019        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4020        calculated if N is given) For square matrices n is almost always m.
4021 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4022 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4023 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4024 .   j - column indices
4025 -   a - matrix values
4026 
4027    Output Parameter:
4028 .   mat - the matrix
4029 
4030    Level: intermediate
4031 
4032    Notes:
4033        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4034      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4035      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4036 
4037        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4038 
4039        The format which is used for the sparse matrix input, is equivalent to a
4040     row-major ordering.. i.e for the following matrix, the input data expected is
4041     as shown
4042 
4043        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4044 
4045 $        1 0 0
4046 $        2 0 3     P0
4047 $       -------
4048 $        4 5 6     P1
4049 $
4050 $     Process0 [P0]: rows_owned=[0,1]
4051 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4052 $        j =  {0,0,2}  [size = 3]
4053 $        v =  {1,2,3}  [size = 3]
4054 $
4055 $     Process1 [P1]: rows_owned=[2]
4056 $        i =  {0,3}    [size = nrow+1  = 1+1]
4057 $        j =  {0,1,2}  [size = 3]
4058 $        v =  {4,5,6}  [size = 3]
4059 
4060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4061           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4062 @*/
4063 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4064 {
4065   PetscErrorCode ierr;
4066 
4067   PetscFunctionBegin;
4068   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4069   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4070   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4071   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4072   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4073   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4074   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4075   PetscFunctionReturn(0);
4076 }
4077 
4078 /*@
4079      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4080          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4081 
4082    Collective
4083 
4084    Input Parameters:
4085 +  mat - the matrix
4086 .  m - number of local rows (Cannot be PETSC_DECIDE)
4087 .  n - This value should be the same as the local size used in creating the
4088        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4089        calculated if N is given) For square matrices n is almost always m.
4090 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4091 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4092 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4093 .  J - column indices
4094 -  v - matrix values
4095 
4096    Level: intermediate
4097 
4098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4099           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4100 @*/
4101 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4102 {
4103   PetscErrorCode ierr;
4104   PetscInt       cstart,nnz,i,j;
4105   PetscInt       *ld;
4106   PetscBool      nooffprocentries;
4107   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4108   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4109   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4110   const PetscInt *Adi = Ad->i;
4111   PetscInt       ldi,Iii,md;
4112 
4113   PetscFunctionBegin;
4114   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4115   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4116   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4117   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4118 
4119   cstart = mat->cmap->rstart;
4120   if (!Aij->ld) {
4121     /* count number of entries below block diagonal */
4122     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4123     Aij->ld = ld;
4124     for (i=0; i<m; i++) {
4125       nnz  = Ii[i+1]- Ii[i];
4126       j     = 0;
4127       while  (J[j] < cstart && j < nnz) {j++;}
4128       J    += nnz;
4129       ld[i] = j;
4130     }
4131   } else {
4132     ld = Aij->ld;
4133   }
4134 
4135   for (i=0; i<m; i++) {
4136     nnz  = Ii[i+1]- Ii[i];
4137     Iii  = Ii[i];
4138     ldi  = ld[i];
4139     md   = Adi[i+1]-Adi[i];
4140     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4141     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4142     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4143     ad  += md;
4144     ao  += nnz - md;
4145   }
4146   nooffprocentries      = mat->nooffprocentries;
4147   mat->nooffprocentries = PETSC_TRUE;
4148   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4149   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4150   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4151   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4152   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4153   mat->nooffprocentries = nooffprocentries;
4154   PetscFunctionReturn(0);
4155 }
4156 
4157 /*@C
4158    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4159    (the default parallel PETSc format).  For good matrix assembly performance
4160    the user should preallocate the matrix storage by setting the parameters
4161    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4162    performance can be increased by more than a factor of 50.
4163 
4164    Collective
4165 
4166    Input Parameters:
4167 +  comm - MPI communicator
4168 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4169            This value should be the same as the local size used in creating the
4170            y vector for the matrix-vector product y = Ax.
4171 .  n - This value should be the same as the local size used in creating the
4172        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4173        calculated if N is given) For square matrices n is almost always m.
4174 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4175 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4176 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4177            (same value is used for all local rows)
4178 .  d_nnz - array containing the number of nonzeros in the various rows of the
4179            DIAGONAL portion of the local submatrix (possibly different for each row)
4180            or NULL, if d_nz is used to specify the nonzero structure.
4181            The size of this array is equal to the number of local rows, i.e 'm'.
4182 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4183            submatrix (same value is used for all local rows).
4184 -  o_nnz - array containing the number of nonzeros in the various rows of the
4185            OFF-DIAGONAL portion of the local submatrix (possibly different for
4186            each row) or NULL, if o_nz is used to specify the nonzero
4187            structure. The size of this array is equal to the number
4188            of local rows, i.e 'm'.
4189 
4190    Output Parameter:
4191 .  A - the matrix
4192 
4193    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4194    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4195    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4196 
4197    Notes:
4198    If the *_nnz parameter is given then the *_nz parameter is ignored
4199 
4200    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4201    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4202    storage requirements for this matrix.
4203 
4204    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4205    processor than it must be used on all processors that share the object for
4206    that argument.
4207 
4208    The user MUST specify either the local or global matrix dimensions
4209    (possibly both).
4210 
4211    The parallel matrix is partitioned across processors such that the
4212    first m0 rows belong to process 0, the next m1 rows belong to
4213    process 1, the next m2 rows belong to process 2 etc.. where
4214    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4215    values corresponding to [m x N] submatrix.
4216 
4217    The columns are logically partitioned with the n0 columns belonging
4218    to 0th partition, the next n1 columns belonging to the next
4219    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4220 
4221    The DIAGONAL portion of the local submatrix on any given processor
4222    is the submatrix corresponding to the rows and columns m,n
4223    corresponding to the given processor. i.e diagonal matrix on
4224    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4225    etc. The remaining portion of the local submatrix [m x (N-n)]
4226    constitute the OFF-DIAGONAL portion. The example below better
4227    illustrates this concept.
4228 
4229    For a square global matrix we define each processor's diagonal portion
4230    to be its local rows and the corresponding columns (a square submatrix);
4231    each processor's off-diagonal portion encompasses the remainder of the
4232    local matrix (a rectangular submatrix).
4233 
4234    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4235 
4236    When calling this routine with a single process communicator, a matrix of
4237    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4238    type of communicator, use the construction mechanism
4239 .vb
4240      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4241 .ve
4242 
4243 $     MatCreate(...,&A);
4244 $     MatSetType(A,MATMPIAIJ);
4245 $     MatSetSizes(A, m,n,M,N);
4246 $     MatMPIAIJSetPreallocation(A,...);
4247 
4248    By default, this format uses inodes (identical nodes) when possible.
4249    We search for consecutive rows with the same nonzero structure, thereby
4250    reusing matrix information to achieve increased efficiency.
4251 
4252    Options Database Keys:
4253 +  -mat_no_inode  - Do not use inodes
4254 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4255 
4256 
4257 
4258    Example usage:
4259 
4260    Consider the following 8x8 matrix with 34 non-zero values, that is
4261    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4262    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4263    as follows
4264 
4265 .vb
4266             1  2  0  |  0  3  0  |  0  4
4267     Proc0   0  5  6  |  7  0  0  |  8  0
4268             9  0 10  | 11  0  0  | 12  0
4269     -------------------------------------
4270            13  0 14  | 15 16 17  |  0  0
4271     Proc1   0 18  0  | 19 20 21  |  0  0
4272             0  0  0  | 22 23  0  | 24  0
4273     -------------------------------------
4274     Proc2  25 26 27  |  0  0 28  | 29  0
4275            30  0  0  | 31 32 33  |  0 34
4276 .ve
4277 
4278    This can be represented as a collection of submatrices as
4279 
4280 .vb
4281       A B C
4282       D E F
4283       G H I
4284 .ve
4285 
4286    Where the submatrices A,B,C are owned by proc0, D,E,F are
4287    owned by proc1, G,H,I are owned by proc2.
4288 
4289    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4290    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4291    The 'M','N' parameters are 8,8, and have the same values on all procs.
4292 
4293    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4294    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4295    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4296    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4297    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4298    matrix, ans [DF] as another SeqAIJ matrix.
4299 
4300    When d_nz, o_nz parameters are specified, d_nz storage elements are
4301    allocated for every row of the local diagonal submatrix, and o_nz
4302    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4303    One way to choose d_nz and o_nz is to use the max nonzerors per local
4304    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4305    In this case, the values of d_nz,o_nz are
4306 .vb
4307      proc0 : dnz = 2, o_nz = 2
4308      proc1 : dnz = 3, o_nz = 2
4309      proc2 : dnz = 1, o_nz = 4
4310 .ve
4311    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4312    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4313    for proc3. i.e we are using 12+15+10=37 storage locations to store
4314    34 values.
4315 
4316    When d_nnz, o_nnz parameters are specified, the storage is specified
4317    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4318    In the above case the values for d_nnz,o_nnz are
4319 .vb
4320      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4321      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4322      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4323 .ve
4324    Here the space allocated is sum of all the above values i.e 34, and
4325    hence pre-allocation is perfect.
4326 
4327    Level: intermediate
4328 
4329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4330           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4331 @*/
4332 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4333 {
4334   PetscErrorCode ierr;
4335   PetscMPIInt    size;
4336 
4337   PetscFunctionBegin;
4338   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4339   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4340   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4341   if (size > 1) {
4342     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4343     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4344   } else {
4345     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4346     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4347   }
4348   PetscFunctionReturn(0);
4349 }
4350 
4351 /*@C
4352   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4353 
4354   Not collective
4355 
4356   Input Parameter:
4357 . A - The MPIAIJ matrix
4358 
4359   Output Parameters:
4360 + Ad - The local diagonal block as a SeqAIJ matrix
4361 . Ao - The local off-diagonal block as a SeqAIJ matrix
4362 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4363 
4364   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4365   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4366   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4367   local column numbers to global column numbers in the original matrix.
4368 
4369   Level: intermediate
4370 
4371 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4372 @*/
4373 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4374 {
4375   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4376   PetscBool      flg;
4377   PetscErrorCode ierr;
4378 
4379   PetscFunctionBegin;
4380   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4381   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4382   if (Ad)     *Ad     = a->A;
4383   if (Ao)     *Ao     = a->B;
4384   if (colmap) *colmap = a->garray;
4385   PetscFunctionReturn(0);
4386 }
4387 
4388 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4389 {
4390   PetscErrorCode ierr;
4391   PetscInt       m,N,i,rstart,nnz,Ii;
4392   PetscInt       *indx;
4393   PetscScalar    *values;
4394 
4395   PetscFunctionBegin;
4396   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4397   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4398     PetscInt       *dnz,*onz,sum,bs,cbs;
4399 
4400     if (n == PETSC_DECIDE) {
4401       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4402     }
4403     /* Check sum(n) = N */
4404     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4405     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4406 
4407     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4408     rstart -= m;
4409 
4410     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4411     for (i=0; i<m; i++) {
4412       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4413       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4414       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4415     }
4416 
4417     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4418     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4419     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4420     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4421     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4422     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4423     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4424     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4425   }
4426 
4427   /* numeric phase */
4428   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4429   for (i=0; i<m; i++) {
4430     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4431     Ii   = i + rstart;
4432     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4433     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4434   }
4435   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4436   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4437   PetscFunctionReturn(0);
4438 }
4439 
4440 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4441 {
4442   PetscErrorCode    ierr;
4443   PetscMPIInt       rank;
4444   PetscInt          m,N,i,rstart,nnz;
4445   size_t            len;
4446   const PetscInt    *indx;
4447   PetscViewer       out;
4448   char              *name;
4449   Mat               B;
4450   const PetscScalar *values;
4451 
4452   PetscFunctionBegin;
4453   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4454   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4455   /* Should this be the type of the diagonal block of A? */
4456   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4457   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4458   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4459   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4460   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4461   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4462   for (i=0; i<m; i++) {
4463     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4464     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4465     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4466   }
4467   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4468   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4469 
4470   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4471   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4472   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4473   sprintf(name,"%s.%d",outfile,rank);
4474   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4475   ierr = PetscFree(name);CHKERRQ(ierr);
4476   ierr = MatView(B,out);CHKERRQ(ierr);
4477   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4478   ierr = MatDestroy(&B);CHKERRQ(ierr);
4479   PetscFunctionReturn(0);
4480 }
4481 
4482 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4483 {
4484   PetscErrorCode      ierr;
4485   Mat_Merge_SeqsToMPI *merge;
4486   PetscContainer      container;
4487 
4488   PetscFunctionBegin;
4489   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4490   if (container) {
4491     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4504     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4505     ierr = PetscFree(merge);CHKERRQ(ierr);
4506     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4507   }
4508   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #include <../src/mat/utils/freespace.h>
4513 #include <petscbt.h>
4514 
4515 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4516 {
4517   PetscErrorCode      ierr;
4518   MPI_Comm            comm;
4519   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4520   PetscMPIInt         size,rank,taga,*len_s;
4521   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4522   PetscInt            proc,m;
4523   PetscInt            **buf_ri,**buf_rj;
4524   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4525   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4526   MPI_Request         *s_waits,*r_waits;
4527   MPI_Status          *status;
4528   MatScalar           *aa=a->a;
4529   MatScalar           **abuf_r,*ba_i;
4530   Mat_Merge_SeqsToMPI *merge;
4531   PetscContainer      container;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4535   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4536 
4537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4539 
4540   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4541   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4542 
4543   bi     = merge->bi;
4544   bj     = merge->bj;
4545   buf_ri = merge->buf_ri;
4546   buf_rj = merge->buf_rj;
4547 
4548   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4549   owners = merge->rowmap->range;
4550   len_s  = merge->len_s;
4551 
4552   /* send and recv matrix values */
4553   /*-----------------------------*/
4554   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4555   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4556 
4557   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4558   for (proc=0,k=0; proc<size; proc++) {
4559     if (!len_s[proc]) continue;
4560     i    = owners[proc];
4561     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4562     k++;
4563   }
4564 
4565   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4566   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4567   ierr = PetscFree(status);CHKERRQ(ierr);
4568 
4569   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4570   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4571 
4572   /* insert mat values of mpimat */
4573   /*----------------------------*/
4574   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4575   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4576 
4577   for (k=0; k<merge->nrecv; k++) {
4578     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4579     nrows       = *(buf_ri_k[k]);
4580     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4581     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4582   }
4583 
4584   /* set values of ba */
4585   m = merge->rowmap->n;
4586   for (i=0; i<m; i++) {
4587     arow = owners[rank] + i;
4588     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4589     bnzi = bi[i+1] - bi[i];
4590     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4591 
4592     /* add local non-zero vals of this proc's seqmat into ba */
4593     anzi   = ai[arow+1] - ai[arow];
4594     aj     = a->j + ai[arow];
4595     aa     = a->a + ai[arow];
4596     nextaj = 0;
4597     for (j=0; nextaj<anzi; j++) {
4598       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4599         ba_i[j] += aa[nextaj++];
4600       }
4601     }
4602 
4603     /* add received vals into ba */
4604     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4605       /* i-th row */
4606       if (i == *nextrow[k]) {
4607         anzi   = *(nextai[k]+1) - *nextai[k];
4608         aj     = buf_rj[k] + *(nextai[k]);
4609         aa     = abuf_r[k] + *(nextai[k]);
4610         nextaj = 0;
4611         for (j=0; nextaj<anzi; j++) {
4612           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4613             ba_i[j] += aa[nextaj++];
4614           }
4615         }
4616         nextrow[k]++; nextai[k]++;
4617       }
4618     }
4619     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4620   }
4621   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4622   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4623 
4624   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4625   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4626   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4627   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4628   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4629   PetscFunctionReturn(0);
4630 }
4631 
4632 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4633 {
4634   PetscErrorCode      ierr;
4635   Mat                 B_mpi;
4636   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4637   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4638   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4639   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4640   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4641   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4642   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4643   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4644   MPI_Status          *status;
4645   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4646   PetscBT             lnkbt;
4647   Mat_Merge_SeqsToMPI *merge;
4648   PetscContainer      container;
4649 
4650   PetscFunctionBegin;
4651   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4652 
4653   /* make sure it is a PETSc comm */
4654   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4655   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4656   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4657 
4658   ierr = PetscNew(&merge);CHKERRQ(ierr);
4659   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4660 
4661   /* determine row ownership */
4662   /*---------------------------------------------------------*/
4663   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4664   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4668   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4669   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4670 
4671   m      = merge->rowmap->n;
4672   owners = merge->rowmap->range;
4673 
4674   /* determine the number of messages to send, their lengths */
4675   /*---------------------------------------------------------*/
4676   len_s = merge->len_s;
4677 
4678   len          = 0; /* length of buf_si[] */
4679   merge->nsend = 0;
4680   for (proc=0; proc<size; proc++) {
4681     len_si[proc] = 0;
4682     if (proc == rank) {
4683       len_s[proc] = 0;
4684     } else {
4685       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4686       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4687     }
4688     if (len_s[proc]) {
4689       merge->nsend++;
4690       nrows = 0;
4691       for (i=owners[proc]; i<owners[proc+1]; i++) {
4692         if (ai[i+1] > ai[i]) nrows++;
4693       }
4694       len_si[proc] = 2*(nrows+1);
4695       len         += len_si[proc];
4696     }
4697   }
4698 
4699   /* determine the number and length of messages to receive for ij-structure */
4700   /*-------------------------------------------------------------------------*/
4701   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4702   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4703 
4704   /* post the Irecv of j-structure */
4705   /*-------------------------------*/
4706   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4707   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4708 
4709   /* post the Isend of j-structure */
4710   /*--------------------------------*/
4711   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4712 
4713   for (proc=0, k=0; proc<size; proc++) {
4714     if (!len_s[proc]) continue;
4715     i    = owners[proc];
4716     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4717     k++;
4718   }
4719 
4720   /* receives and sends of j-structure are complete */
4721   /*------------------------------------------------*/
4722   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4723   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4724 
4725   /* send and recv i-structure */
4726   /*---------------------------*/
4727   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4728   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4729 
4730   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4731   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4732   for (proc=0,k=0; proc<size; proc++) {
4733     if (!len_s[proc]) continue;
4734     /* form outgoing message for i-structure:
4735          buf_si[0]:                 nrows to be sent
4736                [1:nrows]:           row index (global)
4737                [nrows+1:2*nrows+1]: i-structure index
4738     */
4739     /*-------------------------------------------*/
4740     nrows       = len_si[proc]/2 - 1;
4741     buf_si_i    = buf_si + nrows+1;
4742     buf_si[0]   = nrows;
4743     buf_si_i[0] = 0;
4744     nrows       = 0;
4745     for (i=owners[proc]; i<owners[proc+1]; i++) {
4746       anzi = ai[i+1] - ai[i];
4747       if (anzi) {
4748         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4749         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4750         nrows++;
4751       }
4752     }
4753     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4754     k++;
4755     buf_si += len_si[proc];
4756   }
4757 
4758   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4759   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4760 
4761   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4762   for (i=0; i<merge->nrecv; i++) {
4763     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4764   }
4765 
4766   ierr = PetscFree(len_si);CHKERRQ(ierr);
4767   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4768   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4769   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4770   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4771   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4772   ierr = PetscFree(status);CHKERRQ(ierr);
4773 
4774   /* compute a local seq matrix in each processor */
4775   /*----------------------------------------------*/
4776   /* allocate bi array and free space for accumulating nonzero column info */
4777   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4778   bi[0] = 0;
4779 
4780   /* create and initialize a linked list */
4781   nlnk = N+1;
4782   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4783 
4784   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4785   len  = ai[owners[rank+1]] - ai[owners[rank]];
4786   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4787 
4788   current_space = free_space;
4789 
4790   /* determine symbolic info for each local row */
4791   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4792 
4793   for (k=0; k<merge->nrecv; k++) {
4794     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4795     nrows       = *buf_ri_k[k];
4796     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4797     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4798   }
4799 
4800   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4801   len  = 0;
4802   for (i=0; i<m; i++) {
4803     bnzi = 0;
4804     /* add local non-zero cols of this proc's seqmat into lnk */
4805     arow  = owners[rank] + i;
4806     anzi  = ai[arow+1] - ai[arow];
4807     aj    = a->j + ai[arow];
4808     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4809     bnzi += nlnk;
4810     /* add received col data into lnk */
4811     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4812       if (i == *nextrow[k]) { /* i-th row */
4813         anzi  = *(nextai[k]+1) - *nextai[k];
4814         aj    = buf_rj[k] + *nextai[k];
4815         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4816         bnzi += nlnk;
4817         nextrow[k]++; nextai[k]++;
4818       }
4819     }
4820     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4821 
4822     /* if free space is not available, make more free space */
4823     if (current_space->local_remaining<bnzi) {
4824       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4825       nspacedouble++;
4826     }
4827     /* copy data into free space, then initialize lnk */
4828     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4829     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4830 
4831     current_space->array           += bnzi;
4832     current_space->local_used      += bnzi;
4833     current_space->local_remaining -= bnzi;
4834 
4835     bi[i+1] = bi[i] + bnzi;
4836   }
4837 
4838   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4839 
4840   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4841   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4842   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4843 
4844   /* create symbolic parallel matrix B_mpi */
4845   /*---------------------------------------*/
4846   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4847   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4848   if (n==PETSC_DECIDE) {
4849     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4850   } else {
4851     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4852   }
4853   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4854   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4855   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4856   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4857   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4858 
4859   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4860   B_mpi->assembled    = PETSC_FALSE;
4861   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4862   merge->bi           = bi;
4863   merge->bj           = bj;
4864   merge->buf_ri       = buf_ri;
4865   merge->buf_rj       = buf_rj;
4866   merge->coi          = NULL;
4867   merge->coj          = NULL;
4868   merge->owners_co    = NULL;
4869 
4870   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4871 
4872   /* attach the supporting struct to B_mpi for reuse */
4873   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4874   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4875   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4876   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4877   *mpimat = B_mpi;
4878 
4879   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4880   PetscFunctionReturn(0);
4881 }
4882 
4883 /*@C
4884       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4885                  matrices from each processor
4886 
4887     Collective
4888 
4889    Input Parameters:
4890 +    comm - the communicators the parallel matrix will live on
4891 .    seqmat - the input sequential matrices
4892 .    m - number of local rows (or PETSC_DECIDE)
4893 .    n - number of local columns (or PETSC_DECIDE)
4894 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4895 
4896    Output Parameter:
4897 .    mpimat - the parallel matrix generated
4898 
4899     Level: advanced
4900 
4901    Notes:
4902      The dimensions of the sequential matrix in each processor MUST be the same.
4903      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4904      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4905 @*/
4906 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4907 {
4908   PetscErrorCode ierr;
4909   PetscMPIInt    size;
4910 
4911   PetscFunctionBegin;
4912   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4913   if (size == 1) {
4914     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4915     if (scall == MAT_INITIAL_MATRIX) {
4916       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4917     } else {
4918       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4919     }
4920     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4921     PetscFunctionReturn(0);
4922   }
4923   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924   if (scall == MAT_INITIAL_MATRIX) {
4925     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4926   }
4927   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4928   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 /*@
4933      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4934           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4935           with MatGetSize()
4936 
4937     Not Collective
4938 
4939    Input Parameters:
4940 +    A - the matrix
4941 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4942 
4943    Output Parameter:
4944 .    A_loc - the local sequential matrix generated
4945 
4946     Level: developer
4947 
4948    Notes:
4949      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4950      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4951      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4952      modify the values of the returned A_loc.
4953 
4954 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4955 
4956 @*/
4957 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4958 {
4959   PetscErrorCode ierr;
4960   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4961   Mat_SeqAIJ     *mat,*a,*b;
4962   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4963   MatScalar      *aa,*ba,*cam;
4964   PetscScalar    *ca;
4965   PetscMPIInt    size;
4966   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4967   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4968   PetscBool      match;
4969 
4970   PetscFunctionBegin;
4971   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4972   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4973   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4974   if (size == 1) {
4975     if (scall == MAT_INITIAL_MATRIX) {
4976       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4977       *A_loc = mpimat->A;
4978     } else if (scall == MAT_REUSE_MATRIX) {
4979       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4980     }
4981     PetscFunctionReturn(0);
4982   }
4983 
4984   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4985   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4986   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4987   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4988   aa = a->a; ba = b->a;
4989   if (scall == MAT_INITIAL_MATRIX) {
4990     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4991     ci[0] = 0;
4992     for (i=0; i<am; i++) {
4993       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4994     }
4995     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4996     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4997     k    = 0;
4998     for (i=0; i<am; i++) {
4999       ncols_o = bi[i+1] - bi[i];
5000       ncols_d = ai[i+1] - ai[i];
5001       /* off-diagonal portion of A */
5002       for (jo=0; jo<ncols_o; jo++) {
5003         col = cmap[*bj];
5004         if (col >= cstart) break;
5005         cj[k]   = col; bj++;
5006         ca[k++] = *ba++;
5007       }
5008       /* diagonal portion of A */
5009       for (j=0; j<ncols_d; j++) {
5010         cj[k]   = cstart + *aj++;
5011         ca[k++] = *aa++;
5012       }
5013       /* off-diagonal portion of A */
5014       for (j=jo; j<ncols_o; j++) {
5015         cj[k]   = cmap[*bj++];
5016         ca[k++] = *ba++;
5017       }
5018     }
5019     /* put together the new matrix */
5020     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5021     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5022     /* Since these are PETSc arrays, change flags to free them as necessary. */
5023     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5024     mat->free_a  = PETSC_TRUE;
5025     mat->free_ij = PETSC_TRUE;
5026     mat->nonew   = 0;
5027   } else if (scall == MAT_REUSE_MATRIX) {
5028     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5029     ci = mat->i; cj = mat->j; cam = mat->a;
5030     for (i=0; i<am; i++) {
5031       /* off-diagonal portion of A */
5032       ncols_o = bi[i+1] - bi[i];
5033       for (jo=0; jo<ncols_o; jo++) {
5034         col = cmap[*bj];
5035         if (col >= cstart) break;
5036         *cam++ = *ba++; bj++;
5037       }
5038       /* diagonal portion of A */
5039       ncols_d = ai[i+1] - ai[i];
5040       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5041       /* off-diagonal portion of A */
5042       for (j=jo; j<ncols_o; j++) {
5043         *cam++ = *ba++; bj++;
5044       }
5045     }
5046   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5047   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5048   PetscFunctionReturn(0);
5049 }
5050 
5051 /*@C
5052      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5053 
5054     Not Collective
5055 
5056    Input Parameters:
5057 +    A - the matrix
5058 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5059 -    row, col - index sets of rows and columns to extract (or NULL)
5060 
5061    Output Parameter:
5062 .    A_loc - the local sequential matrix generated
5063 
5064     Level: developer
5065 
5066 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5067 
5068 @*/
5069 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5070 {
5071   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5072   PetscErrorCode ierr;
5073   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5074   IS             isrowa,iscola;
5075   Mat            *aloc;
5076   PetscBool      match;
5077 
5078   PetscFunctionBegin;
5079   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5080   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5081   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5082   if (!row) {
5083     start = A->rmap->rstart; end = A->rmap->rend;
5084     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5085   } else {
5086     isrowa = *row;
5087   }
5088   if (!col) {
5089     start = A->cmap->rstart;
5090     cmap  = a->garray;
5091     nzA   = a->A->cmap->n;
5092     nzB   = a->B->cmap->n;
5093     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5094     ncols = 0;
5095     for (i=0; i<nzB; i++) {
5096       if (cmap[i] < start) idx[ncols++] = cmap[i];
5097       else break;
5098     }
5099     imark = i;
5100     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5101     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5102     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5103   } else {
5104     iscola = *col;
5105   }
5106   if (scall != MAT_INITIAL_MATRIX) {
5107     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5108     aloc[0] = *A_loc;
5109   }
5110   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5111   if (!col) { /* attach global id of condensed columns */
5112     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5113   }
5114   *A_loc = aloc[0];
5115   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5116   if (!row) {
5117     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5118   }
5119   if (!col) {
5120     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5121   }
5122   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5123   PetscFunctionReturn(0);
5124 }
5125 
5126 /*
5127  * Destroy a mat that may be composed with PetscSF communication objects.
5128  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5129  * */
5130 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5131 {
5132   PetscSF          sf,osf;
5133   IS               map;
5134   PetscErrorCode   ierr;
5135 
5136   PetscFunctionBegin;
5137   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5138   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5139   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5140   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5141   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5142   ierr = ISDestroy(&map);CHKERRQ(ierr);
5143   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5144   PetscFunctionReturn(0);
5145 }
5146 
5147 /*
5148  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5149  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5150  * on a global size.
5151  * */
5152 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5153 {
5154   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5155   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5156   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5157   PetscMPIInt              owner;
5158   PetscSFNode              *iremote,*oiremote;
5159   const PetscInt           *lrowindices;
5160   PetscErrorCode           ierr;
5161   PetscSF                  sf,osf;
5162   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5163   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5164   MPI_Comm                 comm;
5165   ISLocalToGlobalMapping   mapping;
5166 
5167   PetscFunctionBegin;
5168   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5169   /* plocalsize is the number of roots
5170    * nrows is the number of leaves
5171    * */
5172   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5173   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5174   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5175   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5176   for (i=0;i<nrows;i++) {
5177     /* Find a remote index and an owner for a row
5178      * The row could be local or remote
5179      * */
5180     owner = 0;
5181     lidx  = 0;
5182     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5183     iremote[i].index = lidx;
5184     iremote[i].rank  = owner;
5185   }
5186   /* Create SF to communicate how many nonzero columns for each row */
5187   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5188   /* SF will figure out the number of nonzero colunms for each row, and their
5189    * offsets
5190    * */
5191   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5192   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5193   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5194 
5195   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5196   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5197   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5198   roffsets[0] = 0;
5199   roffsets[1] = 0;
5200   for (i=0;i<plocalsize;i++) {
5201     /* diag */
5202     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5203     /* off diag */
5204     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5205     /* compute offsets so that we relative location for each row */
5206     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5207     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5208   }
5209   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5210   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5211   /* 'r' means root, and 'l' means leaf */
5212   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5213   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5214   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5215   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5216   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5217   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5218   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5219   dntotalcols = 0;
5220   ontotalcols = 0;
5221   ncol = 0;
5222   for (i=0;i<nrows;i++) {
5223     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5224     ncol = PetscMax(pnnz[i],ncol);
5225     /* diag */
5226     dntotalcols += nlcols[i*2+0];
5227     /* off diag */
5228     ontotalcols += nlcols[i*2+1];
5229   }
5230   /* We do not need to figure the right number of columns
5231    * since all the calculations will be done by going through the raw data
5232    * */
5233   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5234   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5235   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5236   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5237   /* diag */
5238   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5239   /* off diag */
5240   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5241   /* diag */
5242   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5243   /* off diag */
5244   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5245   dntotalcols = 0;
5246   ontotalcols = 0;
5247   ntotalcols  = 0;
5248   for (i=0;i<nrows;i++) {
5249     owner = 0;
5250     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5251     /* Set iremote for diag matrix */
5252     for (j=0;j<nlcols[i*2+0];j++) {
5253       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5254       iremote[dntotalcols].rank    = owner;
5255       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5256       ilocal[dntotalcols++]        = ntotalcols++;
5257     }
5258     /* off diag */
5259     for (j=0;j<nlcols[i*2+1];j++) {
5260       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5261       oiremote[ontotalcols].rank    = owner;
5262       oilocal[ontotalcols++]        = ntotalcols++;
5263     }
5264   }
5265   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5266   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5267   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5268   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5269   /* P serves as roots and P_oth is leaves
5270    * Diag matrix
5271    * */
5272   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5273   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5274   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5275 
5276   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5277   /* Off diag */
5278   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5279   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5280   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5281   /* We operate on the matrix internal data for saving memory */
5282   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5283   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5284   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5285   /* Convert to global indices for diag matrix */
5286   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5287   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5288   /* We want P_oth store global indices */
5289   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5290   /* Use memory scalable approach */
5291   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5292   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5293   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5294   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5295   /* Convert back to local indices */
5296   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5297   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5298   nout = 0;
5299   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5300   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5301   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5302   /* Exchange values */
5303   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5304   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5305   /* Stop PETSc from shrinking memory */
5306   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5307   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5308   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5309   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5310   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5311   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5312   /* ``New MatDestroy" takes care of PetscSF objects as well */
5313   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5314   PetscFunctionReturn(0);
5315 }
5316 
5317 /*
5318  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5319  * This supports MPIAIJ and MAIJ
5320  * */
5321 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5322 {
5323   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5324   Mat_SeqAIJ            *p_oth;
5325   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5326   IS                    rows,map;
5327   PetscHMapI            hamp;
5328   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5329   MPI_Comm              comm;
5330   PetscSF               sf,osf;
5331   PetscBool             has;
5332   PetscErrorCode        ierr;
5333 
5334   PetscFunctionBegin;
5335   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5336   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5337   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5338    *  and then create a submatrix (that often is an overlapping matrix)
5339    * */
5340   if (reuse==MAT_INITIAL_MATRIX) {
5341     /* Use a hash table to figure out unique keys */
5342     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5343     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5344     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5345     count = 0;
5346     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5347     for (i=0;i<a->B->cmap->n;i++) {
5348       key  = a->garray[i]/dof;
5349       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5350       if (!has) {
5351         mapping[i] = count;
5352         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5353       } else {
5354         /* Current 'i' has the same value the previous step */
5355         mapping[i] = count-1;
5356       }
5357     }
5358     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5359     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5360     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5361     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5362     off = 0;
5363     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5364     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5365     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5366     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5367     /* In case, the matrix was already created but users want to recreate the matrix */
5368     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5369     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5370     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5371     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5372   } else if (reuse==MAT_REUSE_MATRIX) {
5373     /* If matrix was already created, we simply update values using SF objects
5374      * that as attached to the matrix ealier.
5375      *  */
5376     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5377     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5378     if (!sf || !osf) {
5379       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5380     }
5381     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5382     /* Update values in place */
5383     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5384     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5385     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5386     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5387   } else {
5388     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5389   }
5390   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5391   PetscFunctionReturn(0);
5392 }
5393 
5394 /*@C
5395     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5396 
5397     Collective on Mat
5398 
5399    Input Parameters:
5400 +    A,B - the matrices in mpiaij format
5401 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5402 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5403 
5404    Output Parameter:
5405 +    rowb, colb - index sets of rows and columns of B to extract
5406 -    B_seq - the sequential matrix generated
5407 
5408     Level: developer
5409 
5410 @*/
5411 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5412 {
5413   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5414   PetscErrorCode ierr;
5415   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5416   IS             isrowb,iscolb;
5417   Mat            *bseq=NULL;
5418 
5419   PetscFunctionBegin;
5420   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5421     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5422   }
5423   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5424 
5425   if (scall == MAT_INITIAL_MATRIX) {
5426     start = A->cmap->rstart;
5427     cmap  = a->garray;
5428     nzA   = a->A->cmap->n;
5429     nzB   = a->B->cmap->n;
5430     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5431     ncols = 0;
5432     for (i=0; i<nzB; i++) {  /* row < local row index */
5433       if (cmap[i] < start) idx[ncols++] = cmap[i];
5434       else break;
5435     }
5436     imark = i;
5437     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5438     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5439     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5440     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5441   } else {
5442     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5443     isrowb  = *rowb; iscolb = *colb;
5444     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5445     bseq[0] = *B_seq;
5446   }
5447   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5448   *B_seq = bseq[0];
5449   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5450   if (!rowb) {
5451     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5452   } else {
5453     *rowb = isrowb;
5454   }
5455   if (!colb) {
5456     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5457   } else {
5458     *colb = iscolb;
5459   }
5460   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5461   PetscFunctionReturn(0);
5462 }
5463 
5464 /*
5465     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5466     of the OFF-DIAGONAL portion of local A
5467 
5468     Collective on Mat
5469 
5470    Input Parameters:
5471 +    A,B - the matrices in mpiaij format
5472 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5473 
5474    Output Parameter:
5475 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5476 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5477 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5478 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5479 
5480     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5481      for this matrix. This is not desirable..
5482 
5483     Level: developer
5484 
5485 */
5486 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5487 {
5488   PetscErrorCode         ierr;
5489   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5490   Mat_SeqAIJ             *b_oth;
5491   VecScatter             ctx;
5492   MPI_Comm               comm;
5493   const PetscMPIInt      *rprocs,*sprocs;
5494   const PetscInt         *srow,*rstarts,*sstarts;
5495   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5496   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5497   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5498   MPI_Request            *rwaits = NULL,*swaits = NULL;
5499   MPI_Status             rstatus;
5500   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5501 
5502   PetscFunctionBegin;
5503   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5504   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5505 
5506   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5507     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5508   }
5509   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5510   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5511 
5512   if (size == 1) {
5513     startsj_s = NULL;
5514     bufa_ptr  = NULL;
5515     *B_oth    = NULL;
5516     PetscFunctionReturn(0);
5517   }
5518 
5519   ctx = a->Mvctx;
5520   tag = ((PetscObject)ctx)->tag;
5521 
5522   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5523   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5524   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5525   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5526   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5527   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5528   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5529 
5530   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5531   if (scall == MAT_INITIAL_MATRIX) {
5532     /* i-array */
5533     /*---------*/
5534     /*  post receives */
5535     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5536     for (i=0; i<nrecvs; i++) {
5537       rowlen = rvalues + rstarts[i]*rbs;
5538       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5539       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5540     }
5541 
5542     /* pack the outgoing message */
5543     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5544 
5545     sstartsj[0] = 0;
5546     rstartsj[0] = 0;
5547     len         = 0; /* total length of j or a array to be sent */
5548     if (nsends) {
5549       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5550       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5551     }
5552     for (i=0; i<nsends; i++) {
5553       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5554       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5555       for (j=0; j<nrows; j++) {
5556         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5557         for (l=0; l<sbs; l++) {
5558           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5559 
5560           rowlen[j*sbs+l] = ncols;
5561 
5562           len += ncols;
5563           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5564         }
5565         k++;
5566       }
5567       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5568 
5569       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5570     }
5571     /* recvs and sends of i-array are completed */
5572     i = nrecvs;
5573     while (i--) {
5574       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5575     }
5576     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5577     ierr = PetscFree(svalues);CHKERRQ(ierr);
5578 
5579     /* allocate buffers for sending j and a arrays */
5580     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5581     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5582 
5583     /* create i-array of B_oth */
5584     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5585 
5586     b_othi[0] = 0;
5587     len       = 0; /* total length of j or a array to be received */
5588     k         = 0;
5589     for (i=0; i<nrecvs; i++) {
5590       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5591       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5592       for (j=0; j<nrows; j++) {
5593         b_othi[k+1] = b_othi[k] + rowlen[j];
5594         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5595         k++;
5596       }
5597       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5598     }
5599     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5600 
5601     /* allocate space for j and a arrrays of B_oth */
5602     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5603     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5604 
5605     /* j-array */
5606     /*---------*/
5607     /*  post receives of j-array */
5608     for (i=0; i<nrecvs; i++) {
5609       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5610       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5611     }
5612 
5613     /* pack the outgoing message j-array */
5614     if (nsends) k = sstarts[0];
5615     for (i=0; i<nsends; i++) {
5616       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5617       bufJ  = bufj+sstartsj[i];
5618       for (j=0; j<nrows; j++) {
5619         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5620         for (ll=0; ll<sbs; ll++) {
5621           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5622           for (l=0; l<ncols; l++) {
5623             *bufJ++ = cols[l];
5624           }
5625           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5626         }
5627       }
5628       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5629     }
5630 
5631     /* recvs and sends of j-array are completed */
5632     i = nrecvs;
5633     while (i--) {
5634       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5635     }
5636     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5637   } else if (scall == MAT_REUSE_MATRIX) {
5638     sstartsj = *startsj_s;
5639     rstartsj = *startsj_r;
5640     bufa     = *bufa_ptr;
5641     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5642     b_otha   = b_oth->a;
5643   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5644 
5645   /* a-array */
5646   /*---------*/
5647   /*  post receives of a-array */
5648   for (i=0; i<nrecvs; i++) {
5649     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5650     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5651   }
5652 
5653   /* pack the outgoing message a-array */
5654   if (nsends) k = sstarts[0];
5655   for (i=0; i<nsends; i++) {
5656     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5657     bufA  = bufa+sstartsj[i];
5658     for (j=0; j<nrows; j++) {
5659       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5660       for (ll=0; ll<sbs; ll++) {
5661         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5662         for (l=0; l<ncols; l++) {
5663           *bufA++ = vals[l];
5664         }
5665         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5666       }
5667     }
5668     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5669   }
5670   /* recvs and sends of a-array are completed */
5671   i = nrecvs;
5672   while (i--) {
5673     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5674   }
5675   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5676   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5677 
5678   if (scall == MAT_INITIAL_MATRIX) {
5679     /* put together the new matrix */
5680     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5681 
5682     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5683     /* Since these are PETSc arrays, change flags to free them as necessary. */
5684     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5685     b_oth->free_a  = PETSC_TRUE;
5686     b_oth->free_ij = PETSC_TRUE;
5687     b_oth->nonew   = 0;
5688 
5689     ierr = PetscFree(bufj);CHKERRQ(ierr);
5690     if (!startsj_s || !bufa_ptr) {
5691       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5692       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5693     } else {
5694       *startsj_s = sstartsj;
5695       *startsj_r = rstartsj;
5696       *bufa_ptr  = bufa;
5697     }
5698   }
5699 
5700   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5701   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5702   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5703   PetscFunctionReturn(0);
5704 }
5705 
5706 /*@C
5707   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5708 
5709   Not Collective
5710 
5711   Input Parameters:
5712 . A - The matrix in mpiaij format
5713 
5714   Output Parameter:
5715 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5716 . colmap - A map from global column index to local index into lvec
5717 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5718 
5719   Level: developer
5720 
5721 @*/
5722 #if defined(PETSC_USE_CTABLE)
5723 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5724 #else
5725 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5726 #endif
5727 {
5728   Mat_MPIAIJ *a;
5729 
5730   PetscFunctionBegin;
5731   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5732   PetscValidPointer(lvec, 2);
5733   PetscValidPointer(colmap, 3);
5734   PetscValidPointer(multScatter, 4);
5735   a = (Mat_MPIAIJ*) A->data;
5736   if (lvec) *lvec = a->lvec;
5737   if (colmap) *colmap = a->colmap;
5738   if (multScatter) *multScatter = a->Mvctx;
5739   PetscFunctionReturn(0);
5740 }
5741 
5742 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5743 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5744 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5745 #if defined(PETSC_HAVE_MKL_SPARSE)
5746 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5747 #endif
5748 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5749 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5750 #if defined(PETSC_HAVE_ELEMENTAL)
5751 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5752 #endif
5753 #if defined(PETSC_HAVE_HYPRE)
5754 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5755 #endif
5756 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5757 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5758 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5759 
5760 /*
5761     Computes (B'*A')' since computing B*A directly is untenable
5762 
5763                n                       p                          p
5764         (              )       (              )         (                  )
5765       m (      A       )  *  n (       B      )   =   m (         C        )
5766         (              )       (              )         (                  )
5767 
5768 */
5769 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5770 {
5771   PetscErrorCode ierr;
5772   Mat            At,Bt,Ct;
5773 
5774   PetscFunctionBegin;
5775   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5776   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5777   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5778   ierr = MatDestroy(&At);CHKERRQ(ierr);
5779   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5780   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5781   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5782   PetscFunctionReturn(0);
5783 }
5784 
5785 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5786 {
5787   PetscErrorCode ierr;
5788   PetscInt       m=A->rmap->n,n=B->cmap->n;
5789 
5790   PetscFunctionBegin;
5791   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5792   ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5793   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5794   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
5795   ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr);
5796   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5797   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5798 
5799   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5800   PetscFunctionReturn(0);
5801 }
5802 
5803 /* ----------------------------------------------------------------*/
5804 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5805 {
5806   Mat_Product *product = C->product;
5807   Mat         A = product->A,B=product->B;
5808 
5809   PetscFunctionBegin;
5810   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5811     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5812 
5813   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5814   C->ops->productsymbolic = MatProductSymbolic_AB;
5815   PetscFunctionReturn(0);
5816 }
5817 
5818 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5819 {
5820   PetscErrorCode ierr;
5821   Mat_Product    *product = C->product;
5822 
5823   PetscFunctionBegin;
5824   if (product->type == MATPRODUCT_AB) {
5825     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5826   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]);
5827   PetscFunctionReturn(0);
5828 }
5829 /* ----------------------------------------------------------------*/
5830 
5831 /*MC
5832    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5833 
5834    Options Database Keys:
5835 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5836 
5837    Level: beginner
5838 
5839    Notes:
5840     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5841     in this case the values associated with the rows and columns one passes in are set to zero
5842     in the matrix
5843 
5844     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5845     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5846 
5847 .seealso: MatCreateAIJ()
5848 M*/
5849 
5850 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5851 {
5852   Mat_MPIAIJ     *b;
5853   PetscErrorCode ierr;
5854   PetscMPIInt    size;
5855 
5856   PetscFunctionBegin;
5857   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5858 
5859   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5860   B->data       = (void*)b;
5861   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5862   B->assembled  = PETSC_FALSE;
5863   B->insertmode = NOT_SET_VALUES;
5864   b->size       = size;
5865 
5866   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5867 
5868   /* build cache for off array entries formed */
5869   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5870 
5871   b->donotstash  = PETSC_FALSE;
5872   b->colmap      = 0;
5873   b->garray      = 0;
5874   b->roworiented = PETSC_TRUE;
5875 
5876   /* stuff used for matrix vector multiply */
5877   b->lvec  = NULL;
5878   b->Mvctx = NULL;
5879 
5880   /* stuff for MatGetRow() */
5881   b->rowindices   = 0;
5882   b->rowvalues    = 0;
5883   b->getrowactive = PETSC_FALSE;
5884 
5885   /* flexible pointer used in CUSP/CUSPARSE classes */
5886   b->spptr = NULL;
5887 
5888   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5890   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5891   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5894   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5896   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5897   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5898 #if defined(PETSC_HAVE_MKL_SPARSE)
5899   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5900 #endif
5901   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5902   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5903   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5904 #if defined(PETSC_HAVE_ELEMENTAL)
5905   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5906 #endif
5907   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5908   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5909   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5910   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5911 #if defined(PETSC_HAVE_HYPRE)
5912   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5913   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5914 #endif
5915   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5916   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5917   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5918   PetscFunctionReturn(0);
5919 }
5920 
5921 /*@C
5922      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5923          and "off-diagonal" part of the matrix in CSR format.
5924 
5925    Collective
5926 
5927    Input Parameters:
5928 +  comm - MPI communicator
5929 .  m - number of local rows (Cannot be PETSC_DECIDE)
5930 .  n - This value should be the same as the local size used in creating the
5931        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5932        calculated if N is given) For square matrices n is almost always m.
5933 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5934 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5935 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5936 .   j - column indices
5937 .   a - matrix values
5938 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5939 .   oj - column indices
5940 -   oa - matrix values
5941 
5942    Output Parameter:
5943 .   mat - the matrix
5944 
5945    Level: advanced
5946 
5947    Notes:
5948        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5949        must free the arrays once the matrix has been destroyed and not before.
5950 
5951        The i and j indices are 0 based
5952 
5953        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5954 
5955        This sets local rows and cannot be used to set off-processor values.
5956 
5957        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5958        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5959        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5960        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5961        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5962        communication if it is known that only local entries will be set.
5963 
5964 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5965           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5966 @*/
5967 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5968 {
5969   PetscErrorCode ierr;
5970   Mat_MPIAIJ     *maij;
5971 
5972   PetscFunctionBegin;
5973   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5974   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5975   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5976   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5977   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5978   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5979   maij = (Mat_MPIAIJ*) (*mat)->data;
5980 
5981   (*mat)->preallocated = PETSC_TRUE;
5982 
5983   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5984   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5985 
5986   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5987   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5988 
5989   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5990   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5991   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5992   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5993 
5994   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5995   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5996   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5997   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5998   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5999   PetscFunctionReturn(0);
6000 }
6001 
6002 /*
6003     Special version for direct calls from Fortran
6004 */
6005 #include <petsc/private/fortranimpl.h>
6006 
6007 /* Change these macros so can be used in void function */
6008 #undef CHKERRQ
6009 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6010 #undef SETERRQ2
6011 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6012 #undef SETERRQ3
6013 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6014 #undef SETERRQ
6015 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6016 
6017 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6018 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6019 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6020 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6021 #else
6022 #endif
6023 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6024 {
6025   Mat            mat  = *mmat;
6026   PetscInt       m    = *mm, n = *mn;
6027   InsertMode     addv = *maddv;
6028   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6029   PetscScalar    value;
6030   PetscErrorCode ierr;
6031 
6032   MatCheckPreallocated(mat,1);
6033   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6034   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6035   {
6036     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6037     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6038     PetscBool roworiented = aij->roworiented;
6039 
6040     /* Some Variables required in the macro */
6041     Mat        A                    = aij->A;
6042     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6043     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6044     MatScalar  *aa                  = a->a;
6045     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6046     Mat        B                    = aij->B;
6047     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6048     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6049     MatScalar  *ba                  = b->a;
6050     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6051      * cannot use "#if defined" inside a macro. */
6052     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6053 
6054     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6055     PetscInt  nonew = a->nonew;
6056     MatScalar *ap1,*ap2;
6057 
6058     PetscFunctionBegin;
6059     for (i=0; i<m; i++) {
6060       if (im[i] < 0) continue;
6061       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6062       if (im[i] >= rstart && im[i] < rend) {
6063         row      = im[i] - rstart;
6064         lastcol1 = -1;
6065         rp1      = aj + ai[row];
6066         ap1      = aa + ai[row];
6067         rmax1    = aimax[row];
6068         nrow1    = ailen[row];
6069         low1     = 0;
6070         high1    = nrow1;
6071         lastcol2 = -1;
6072         rp2      = bj + bi[row];
6073         ap2      = ba + bi[row];
6074         rmax2    = bimax[row];
6075         nrow2    = bilen[row];
6076         low2     = 0;
6077         high2    = nrow2;
6078 
6079         for (j=0; j<n; j++) {
6080           if (roworiented) value = v[i*n+j];
6081           else value = v[i+j*m];
6082           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6083           if (in[j] >= cstart && in[j] < cend) {
6084             col = in[j] - cstart;
6085             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6086 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6087             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6088 #endif
6089           } else if (in[j] < 0) continue;
6090           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6091             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6092             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6093           } else {
6094             if (mat->was_assembled) {
6095               if (!aij->colmap) {
6096                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6097               }
6098 #if defined(PETSC_USE_CTABLE)
6099               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6100               col--;
6101 #else
6102               col = aij->colmap[in[j]] - 1;
6103 #endif
6104               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6105                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6106                 col  =  in[j];
6107                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6108                 B        = aij->B;
6109                 b        = (Mat_SeqAIJ*)B->data;
6110                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6111                 rp2      = bj + bi[row];
6112                 ap2      = ba + bi[row];
6113                 rmax2    = bimax[row];
6114                 nrow2    = bilen[row];
6115                 low2     = 0;
6116                 high2    = nrow2;
6117                 bm       = aij->B->rmap->n;
6118                 ba       = b->a;
6119                 inserted = PETSC_FALSE;
6120               }
6121             } else col = in[j];
6122             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6123 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6124             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6125 #endif
6126           }
6127         }
6128       } else if (!aij->donotstash) {
6129         if (roworiented) {
6130           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6131         } else {
6132           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6133         }
6134       }
6135     }
6136   }
6137   PetscFunctionReturnVoid();
6138 }
6139