xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 117ef88edefbfc12e7c19efe87a19a2e1b0acd4f)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113 
1114   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135   VecScatter     Mvctx = a->Mvctx;
1136 
1137   PetscFunctionBegin;
1138   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1139   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   /* do nondiagonal part */
1153   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1154   /* do local part */
1155   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1156   /* add partial results together */
1157   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1163 {
1164   MPI_Comm       comm;
1165   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1166   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1167   IS             Me,Notme;
1168   PetscErrorCode ierr;
1169   PetscInt       M,N,first,last,*notme,i;
1170   PetscBool      lf;
1171   PetscMPIInt    size;
1172 
1173   PetscFunctionBegin;
1174   /* Easy test: symmetric diagonal block */
1175   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1176   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1177   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1178   if (!*f) PetscFunctionReturn(0);
1179   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1181   if (size == 1) PetscFunctionReturn(0);
1182 
1183   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1184   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1185   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1186   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1187   for (i=0; i<first; i++) notme[i] = i;
1188   for (i=last; i<M; i++) notme[i-last+first] = i;
1189   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1190   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1191   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1192   Aoff = Aoffs[0];
1193   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1194   Boff = Boffs[0];
1195   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1197   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1199   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1200   ierr = PetscFree(notme);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1205 {
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214 {
1215   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   /* do nondiagonal part */
1220   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1221   /* do local part */
1222   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1223   /* add partial results together */
1224   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*
1230   This only works correctly for square matrices where the subblock A->A is the
1231    diagonal block
1232 */
1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1234 {
1235   PetscErrorCode ierr;
1236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1246 {
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1252   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1257 {
1258   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262 #if defined(PETSC_USE_LOG)
1263   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1264 #endif
1265   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1266   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1268   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1269 #if defined(PETSC_USE_CTABLE)
1270   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1271 #else
1272   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1273 #endif
1274   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1275   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1276   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1277   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1278   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1279   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1280   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1281 
1282   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1283   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1284 
1285   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1295 #if defined(PETSC_HAVE_ELEMENTAL)
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1297 #endif
1298 #if defined(PETSC_HAVE_HYPRE)
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1301 #endif
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1309 {
1310   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1312   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1313   const PetscInt    *garray = aij->garray;
1314   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1315   PetscInt          *rowlens;
1316   PetscInt          *colidxs;
1317   PetscScalar       *matvals;
1318   PetscErrorCode    ierr;
1319 
1320   PetscFunctionBegin;
1321   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1322 
1323   M  = mat->rmap->N;
1324   N  = mat->cmap->N;
1325   m  = mat->rmap->n;
1326   rs = mat->rmap->rstart;
1327   cs = mat->cmap->rstart;
1328   nz = A->nz + B->nz;
1329 
1330   /* write matrix header */
1331   header[0] = MAT_FILE_CLASSID;
1332   header[1] = M; header[2] = N; header[3] = nz;
1333   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1335 
1336   /* fill in and store row lengths  */
1337   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1338   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1339   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1340   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1341 
1342   /* fill in and store column indices */
1343   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1344   for (cnt=0, i=0; i<m; i++) {
1345     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1346       if (garray[B->j[jb]] > cs) break;
1347       colidxs[cnt++] = garray[B->j[jb]];
1348     }
1349     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1350       colidxs[cnt++] = A->j[ja] + cs;
1351     for (; jb<B->i[i+1]; jb++)
1352       colidxs[cnt++] = garray[B->j[jb]];
1353   }
1354   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1355   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1356   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1357 
1358   /* fill in and store nonzero values */
1359   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1360   for (cnt=0, i=0; i<m; i++) {
1361     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1362       if (garray[B->j[jb]] > cs) break;
1363       matvals[cnt++] = B->a[jb];
1364     }
1365     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1366       matvals[cnt++] = A->a[ja];
1367     for (; jb<B->i[i+1]; jb++)
1368       matvals[cnt++] = B->a[jb];
1369   }
1370   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1371   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1372   ierr = PetscFree(matvals);CHKERRQ(ierr);
1373 
1374   /* write block size option to the viewer's .info file */
1375   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #include <petscdraw.h>
1380 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1381 {
1382   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1383   PetscErrorCode    ierr;
1384   PetscMPIInt       rank = aij->rank,size = aij->size;
1385   PetscBool         isdraw,iascii,isbinary;
1386   PetscViewer       sviewer;
1387   PetscViewerFormat format;
1388 
1389   PetscFunctionBegin;
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1393   if (iascii) {
1394     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1395     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1396       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1397       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1398       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1399       for (i=0; i<(PetscInt)size; i++) {
1400         nmax = PetscMax(nmax,nz[i]);
1401         nmin = PetscMin(nmin,nz[i]);
1402         navg += nz[i];
1403       }
1404       ierr = PetscFree(nz);CHKERRQ(ierr);
1405       navg = navg/size;
1406       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1407       PetscFunctionReturn(0);
1408     }
1409     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1410     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1411       MatInfo   info;
1412       PetscBool inodes;
1413 
1414       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1415       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1416       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1418       if (!inodes) {
1419         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1420                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1421       } else {
1422         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1423                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1424       }
1425       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1426       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1427       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1431       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1432       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1433       PetscFunctionReturn(0);
1434     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1435       PetscInt inodecount,inodelimit,*inodes;
1436       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1437       if (inodes) {
1438         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1439       } else {
1440         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1441       }
1442       PetscFunctionReturn(0);
1443     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1444       PetscFunctionReturn(0);
1445     }
1446   } else if (isbinary) {
1447     if (size == 1) {
1448       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1450     } else {
1451       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1452     }
1453     PetscFunctionReturn(0);
1454   } else if (iascii && size == 1) {
1455     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1456     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1457     PetscFunctionReturn(0);
1458   } else if (isdraw) {
1459     PetscDraw draw;
1460     PetscBool isnull;
1461     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1462     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1463     if (isnull) PetscFunctionReturn(0);
1464   }
1465 
1466   { /* assemble the entire matrix onto first processor */
1467     Mat A = NULL, Av;
1468     IS  isrow,iscol;
1469 
1470     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1471     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1472     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1473     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1474 /*  The commented code uses MatCreateSubMatrices instead */
1475 /*
1476     Mat *AA, A = NULL, Av;
1477     IS  isrow,iscol;
1478 
1479     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1480     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1481     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1482     if (!rank) {
1483        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1484        A    = AA[0];
1485        Av   = AA[0];
1486     }
1487     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1488 */
1489     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1490     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1491     /*
1492        Everyone has to call to draw the matrix since the graphics waits are
1493        synchronized across all processors that share the PetscDraw object
1494     */
1495     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1496     if (!rank) {
1497       if (((PetscObject)mat)->name) {
1498         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1499       }
1500       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1501     }
1502     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1503     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1504     ierr = MatDestroy(&A);CHKERRQ(ierr);
1505   }
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1510 {
1511   PetscErrorCode ierr;
1512   PetscBool      iascii,isdraw,issocket,isbinary;
1513 
1514   PetscFunctionBegin;
1515   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1516   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1519   if (iascii || isdraw || isbinary || issocket) {
1520     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1521   }
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1526 {
1527   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1528   PetscErrorCode ierr;
1529   Vec            bb1 = 0;
1530   PetscBool      hasop;
1531 
1532   PetscFunctionBegin;
1533   if (flag == SOR_APPLY_UPPER) {
1534     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1535     PetscFunctionReturn(0);
1536   }
1537 
1538   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1539     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1540   }
1541 
1542   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1543     if (flag & SOR_ZERO_INITIAL_GUESS) {
1544       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1545       its--;
1546     }
1547 
1548     while (its--) {
1549       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1550       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551 
1552       /* update rhs: bb1 = bb - B*x */
1553       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1554       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1555 
1556       /* local sweep */
1557       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1558     }
1559   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1560     if (flag & SOR_ZERO_INITIAL_GUESS) {
1561       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1562       its--;
1563     }
1564     while (its--) {
1565       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1566       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1567 
1568       /* update rhs: bb1 = bb - B*x */
1569       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1570       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1571 
1572       /* local sweep */
1573       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1574     }
1575   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1576     if (flag & SOR_ZERO_INITIAL_GUESS) {
1577       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1578       its--;
1579     }
1580     while (its--) {
1581       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583 
1584       /* update rhs: bb1 = bb - B*x */
1585       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1586       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1587 
1588       /* local sweep */
1589       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1590     }
1591   } else if (flag & SOR_EISENSTAT) {
1592     Vec xx1;
1593 
1594     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1595     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1596 
1597     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1599     if (!mat->diag) {
1600       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1601       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1602     }
1603     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1604     if (hasop) {
1605       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1606     } else {
1607       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1608     }
1609     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1610 
1611     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1612 
1613     /* local sweep */
1614     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1615     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1616     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1617   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1618 
1619   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1620 
1621   matin->factorerrortype = mat->A->factorerrortype;
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1626 {
1627   Mat            aA,aB,Aperm;
1628   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1629   PetscScalar    *aa,*ba;
1630   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1631   PetscSF        rowsf,sf;
1632   IS             parcolp = NULL;
1633   PetscBool      done;
1634   PetscErrorCode ierr;
1635 
1636   PetscFunctionBegin;
1637   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1638   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1639   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1640   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1641 
1642   /* Invert row permutation to find out where my rows should go */
1643   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1644   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1645   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1646   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1647   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1648   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1649 
1650   /* Invert column permutation to find out where my columns should go */
1651   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1652   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1653   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1654   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1655   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1656   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1657   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1658 
1659   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1660   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1661   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1662 
1663   /* Find out where my gcols should go */
1664   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1665   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1666   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1667   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1668   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1669   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1670   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1671   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1672 
1673   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1674   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1675   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1676   for (i=0; i<m; i++) {
1677     PetscInt    row = rdest[i];
1678     PetscMPIInt rowner;
1679     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1680     for (j=ai[i]; j<ai[i+1]; j++) {
1681       PetscInt    col = cdest[aj[j]];
1682       PetscMPIInt cowner;
1683       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1684       if (rowner == cowner) dnnz[i]++;
1685       else onnz[i]++;
1686     }
1687     for (j=bi[i]; j<bi[i+1]; j++) {
1688       PetscInt    col = gcdest[bj[j]];
1689       PetscMPIInt cowner;
1690       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1691       if (rowner == cowner) dnnz[i]++;
1692       else onnz[i]++;
1693     }
1694   }
1695   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1696   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1699   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1700 
1701   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1702   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1703   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1704   for (i=0; i<m; i++) {
1705     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1706     PetscInt j0,rowlen;
1707     rowlen = ai[i+1] - ai[i];
1708     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1709       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1710       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1711     }
1712     rowlen = bi[i+1] - bi[i];
1713     for (j0=j=0; j<rowlen; j0=j) {
1714       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1715       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1716     }
1717   }
1718   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1721   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1722   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1723   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1724   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1725   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1726   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1727   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1728   *B = Aperm;
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1733 {
1734   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1739   if (ghosts) *ghosts = aij->garray;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1744 {
1745   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1746   Mat            A    = mat->A,B = mat->B;
1747   PetscErrorCode ierr;
1748   PetscLogDouble isend[5],irecv[5];
1749 
1750   PetscFunctionBegin;
1751   info->block_size = 1.0;
1752   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1753 
1754   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1755   isend[3] = info->memory;  isend[4] = info->mallocs;
1756 
1757   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1758 
1759   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1760   isend[3] += info->memory;  isend[4] += info->mallocs;
1761   if (flag == MAT_LOCAL) {
1762     info->nz_used      = isend[0];
1763     info->nz_allocated = isend[1];
1764     info->nz_unneeded  = isend[2];
1765     info->memory       = isend[3];
1766     info->mallocs      = isend[4];
1767   } else if (flag == MAT_GLOBAL_MAX) {
1768     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1769 
1770     info->nz_used      = irecv[0];
1771     info->nz_allocated = irecv[1];
1772     info->nz_unneeded  = irecv[2];
1773     info->memory       = irecv[3];
1774     info->mallocs      = irecv[4];
1775   } else if (flag == MAT_GLOBAL_SUM) {
1776     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1777 
1778     info->nz_used      = irecv[0];
1779     info->nz_allocated = irecv[1];
1780     info->nz_unneeded  = irecv[2];
1781     info->memory       = irecv[3];
1782     info->mallocs      = irecv[4];
1783   }
1784   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1785   info->fill_ratio_needed = 0;
1786   info->factor_mallocs    = 0;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1791 {
1792   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   switch (op) {
1797   case MAT_NEW_NONZERO_LOCATIONS:
1798   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1799   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1800   case MAT_KEEP_NONZERO_PATTERN:
1801   case MAT_NEW_NONZERO_LOCATION_ERR:
1802   case MAT_USE_INODES:
1803   case MAT_IGNORE_ZERO_ENTRIES:
1804     MatCheckPreallocated(A,1);
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_ROW_ORIENTED:
1809     MatCheckPreallocated(A,1);
1810     a->roworiented = flg;
1811 
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1814     break;
1815   case MAT_NEW_DIAGONALS:
1816   case MAT_SORTED_FULL:
1817     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1818     break;
1819   case MAT_IGNORE_OFF_PROC_ENTRIES:
1820     a->donotstash = flg;
1821     break;
1822   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1823   case MAT_SPD:
1824   case MAT_SYMMETRIC:
1825   case MAT_STRUCTURALLY_SYMMETRIC:
1826   case MAT_HERMITIAN:
1827   case MAT_SYMMETRY_ETERNAL:
1828     break;
1829   case MAT_SUBMAT_SINGLEIS:
1830     A->submat_singleis = flg;
1831     break;
1832   case MAT_STRUCTURE_ONLY:
1833     /* The option is handled directly by MatSetOption() */
1834     break;
1835   default:
1836     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1837   }
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1842 {
1843   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1844   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1845   PetscErrorCode ierr;
1846   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1847   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1848   PetscInt       *cmap,*idx_p;
1849 
1850   PetscFunctionBegin;
1851   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1852   mat->getrowactive = PETSC_TRUE;
1853 
1854   if (!mat->rowvalues && (idx || v)) {
1855     /*
1856         allocate enough space to hold information from the longest row.
1857     */
1858     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1859     PetscInt   max = 1,tmp;
1860     for (i=0; i<matin->rmap->n; i++) {
1861       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1862       if (max < tmp) max = tmp;
1863     }
1864     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1865   }
1866 
1867   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1868   lrow = row - rstart;
1869 
1870   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1871   if (!v)   {pvA = 0; pvB = 0;}
1872   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1873   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1874   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1875   nztot = nzA + nzB;
1876 
1877   cmap = mat->garray;
1878   if (v  || idx) {
1879     if (nztot) {
1880       /* Sort by increasing column numbers, assuming A and B already sorted */
1881       PetscInt imark = -1;
1882       if (v) {
1883         *v = v_p = mat->rowvalues;
1884         for (i=0; i<nzB; i++) {
1885           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1886           else break;
1887         }
1888         imark = i;
1889         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1890         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1891       }
1892       if (idx) {
1893         *idx = idx_p = mat->rowindices;
1894         if (imark > -1) {
1895           for (i=0; i<imark; i++) {
1896             idx_p[i] = cmap[cworkB[i]];
1897           }
1898         } else {
1899           for (i=0; i<nzB; i++) {
1900             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1901             else break;
1902           }
1903           imark = i;
1904         }
1905         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1906         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1907       }
1908     } else {
1909       if (idx) *idx = 0;
1910       if (v)   *v   = 0;
1911     }
1912   }
1913   *nz  = nztot;
1914   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1915   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1922 
1923   PetscFunctionBegin;
1924   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1925   aij->getrowactive = PETSC_FALSE;
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1930 {
1931   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1932   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1933   PetscErrorCode ierr;
1934   PetscInt       i,j,cstart = mat->cmap->rstart;
1935   PetscReal      sum = 0.0;
1936   MatScalar      *v;
1937 
1938   PetscFunctionBegin;
1939   if (aij->size == 1) {
1940     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1941   } else {
1942     if (type == NORM_FROBENIUS) {
1943       v = amat->a;
1944       for (i=0; i<amat->nz; i++) {
1945         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1946       }
1947       v = bmat->a;
1948       for (i=0; i<bmat->nz; i++) {
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950       }
1951       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1952       *norm = PetscSqrtReal(*norm);
1953       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1954     } else if (type == NORM_1) { /* max column norm */
1955       PetscReal *tmp,*tmp2;
1956       PetscInt  *jj,*garray = aij->garray;
1957       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1958       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1959       *norm = 0.0;
1960       v     = amat->a; jj = amat->j;
1961       for (j=0; j<amat->nz; j++) {
1962         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1963       }
1964       v = bmat->a; jj = bmat->j;
1965       for (j=0; j<bmat->nz; j++) {
1966         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1967       }
1968       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1969       for (j=0; j<mat->cmap->N; j++) {
1970         if (tmp2[j] > *norm) *norm = tmp2[j];
1971       }
1972       ierr = PetscFree(tmp);CHKERRQ(ierr);
1973       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1974       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1975     } else if (type == NORM_INFINITY) { /* max row norm */
1976       PetscReal ntemp = 0.0;
1977       for (j=0; j<aij->A->rmap->n; j++) {
1978         v   = amat->a + amat->i[j];
1979         sum = 0.0;
1980         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1981           sum += PetscAbsScalar(*v); v++;
1982         }
1983         v = bmat->a + bmat->i[j];
1984         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1985           sum += PetscAbsScalar(*v); v++;
1986         }
1987         if (sum > ntemp) ntemp = sum;
1988       }
1989       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1990       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1991     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1997 {
1998   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1999   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2000   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2001   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2002   PetscErrorCode  ierr;
2003   Mat             B,A_diag,*B_diag;
2004   const MatScalar *array;
2005 
2006   PetscFunctionBegin;
2007   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2008   ai = Aloc->i; aj = Aloc->j;
2009   bi = Bloc->i; bj = Bloc->j;
2010   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2011     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2012     PetscSFNode          *oloc;
2013     PETSC_UNUSED PetscSF sf;
2014 
2015     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2016     /* compute d_nnz for preallocation */
2017     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2018     for (i=0; i<ai[ma]; i++) {
2019       d_nnz[aj[i]]++;
2020     }
2021     /* compute local off-diagonal contributions */
2022     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2023     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2024     /* map those to global */
2025     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2026     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2027     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2028     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2029     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2030     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2031     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2032 
2033     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2034     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2035     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2036     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2037     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2038     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2039   } else {
2040     B    = *matout;
2041     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2042   }
2043 
2044   b           = (Mat_MPIAIJ*)B->data;
2045   A_diag      = a->A;
2046   B_diag      = &b->A;
2047   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2048   A_diag_ncol = A_diag->cmap->N;
2049   B_diag_ilen = sub_B_diag->ilen;
2050   B_diag_i    = sub_B_diag->i;
2051 
2052   /* Set ilen for diagonal of B */
2053   for (i=0; i<A_diag_ncol; i++) {
2054     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2055   }
2056 
2057   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2058   very quickly (=without using MatSetValues), because all writes are local. */
2059   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2060 
2061   /* copy over the B part */
2062   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2063   array = Bloc->a;
2064   row   = A->rmap->rstart;
2065   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2066   cols_tmp = cols;
2067   for (i=0; i<mb; i++) {
2068     ncol = bi[i+1]-bi[i];
2069     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2070     row++;
2071     array += ncol; cols_tmp += ncol;
2072   }
2073   ierr = PetscFree(cols);CHKERRQ(ierr);
2074 
2075   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2077   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2078     *matout = B;
2079   } else {
2080     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2086 {
2087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2088   Mat            a    = aij->A,b = aij->B;
2089   PetscErrorCode ierr;
2090   PetscInt       s1,s2,s3;
2091 
2092   PetscFunctionBegin;
2093   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2094   if (rr) {
2095     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2096     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2097     /* Overlap communication with computation. */
2098     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2099   }
2100   if (ll) {
2101     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2102     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2103     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2104   }
2105   /* scale  the diagonal block */
2106   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2107 
2108   if (rr) {
2109     /* Do a scatter end and then right scale the off-diagonal block */
2110     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2111     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2112   }
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2117 {
2118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2119   PetscErrorCode ierr;
2120 
2121   PetscFunctionBegin;
2122   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2127 {
2128   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2129   Mat            a,b,c,d;
2130   PetscBool      flg;
2131   PetscErrorCode ierr;
2132 
2133   PetscFunctionBegin;
2134   a = matA->A; b = matA->B;
2135   c = matB->A; d = matB->B;
2136 
2137   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2138   if (flg) {
2139     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2140   }
2141   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2146 {
2147   PetscErrorCode ierr;
2148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2149   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2150 
2151   PetscFunctionBegin;
2152   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2153   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2154     /* because of the column compression in the off-processor part of the matrix a->B,
2155        the number of columns in a->B and b->B may be different, hence we cannot call
2156        the MatCopy() directly on the two parts. If need be, we can provide a more
2157        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2158        then copying the submatrices */
2159     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2160   } else {
2161     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2162     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2163   }
2164   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2169 {
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 /*
2178    Computes the number of nonzeros per row needed for preallocation when X and Y
2179    have different nonzero structure.
2180 */
2181 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2182 {
2183   PetscInt       i,j,k,nzx,nzy;
2184 
2185   PetscFunctionBegin;
2186   /* Set the number of nonzeros in the new matrix */
2187   for (i=0; i<m; i++) {
2188     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2189     nzx = xi[i+1] - xi[i];
2190     nzy = yi[i+1] - yi[i];
2191     nnz[i] = 0;
2192     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2193       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2194       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2195       nnz[i]++;
2196     }
2197     for (; k<nzy; k++) nnz[i]++;
2198   }
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2203 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2204 {
2205   PetscErrorCode ierr;
2206   PetscInt       m = Y->rmap->N;
2207   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2208   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2209 
2210   PetscFunctionBegin;
2211   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2216 {
2217   PetscErrorCode ierr;
2218   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2219   PetscBLASInt   bnz,one=1;
2220   Mat_SeqAIJ     *x,*y;
2221 
2222   PetscFunctionBegin;
2223   if (str == SAME_NONZERO_PATTERN) {
2224     PetscScalar alpha = a;
2225     x    = (Mat_SeqAIJ*)xx->A->data;
2226     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2227     y    = (Mat_SeqAIJ*)yy->A->data;
2228     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2229     x    = (Mat_SeqAIJ*)xx->B->data;
2230     y    = (Mat_SeqAIJ*)yy->B->data;
2231     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2232     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2233     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2234     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2235        will be updated */
2236 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2237     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2238       Y->offloadmask = PETSC_OFFLOAD_CPU;
2239     }
2240 #endif
2241   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2242     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2243   } else {
2244     Mat      B;
2245     PetscInt *nnz_d,*nnz_o;
2246     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2247     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2248     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2249     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2250     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2251     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2252     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2255     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2256     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2257     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2260   }
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2265 
2266 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2267 {
2268 #if defined(PETSC_USE_COMPLEX)
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2274   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2275 #else
2276   PetscFunctionBegin;
2277 #endif
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2282 {
2283   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2288   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2293 {
2294   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2299   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2304 {
2305   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2306   PetscErrorCode ierr;
2307   PetscInt       i,*idxb = 0;
2308   PetscScalar    *va,*vb;
2309   Vec            vtmp;
2310 
2311   PetscFunctionBegin;
2312   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2313   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2314   if (idx) {
2315     for (i=0; i<A->rmap->n; i++) {
2316       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2317     }
2318   }
2319 
2320   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2321   if (idx) {
2322     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2323   }
2324   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2325   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2326 
2327   for (i=0; i<A->rmap->n; i++) {
2328     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2329       va[i] = vb[i];
2330       if (idx) idx[i] = a->garray[idxb[i]];
2331     }
2332   }
2333 
2334   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2335   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2336   ierr = PetscFree(idxb);CHKERRQ(ierr);
2337   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2342 {
2343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2344   PetscErrorCode ierr;
2345   PetscInt       i,*idxb = 0;
2346   PetscScalar    *va,*vb;
2347   Vec            vtmp;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2351   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2352   if (idx) {
2353     for (i=0; i<A->cmap->n; i++) {
2354       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2355     }
2356   }
2357 
2358   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2359   if (idx) {
2360     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2361   }
2362   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2363   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2364 
2365   for (i=0; i<A->rmap->n; i++) {
2366     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2367       va[i] = vb[i];
2368       if (idx) idx[i] = a->garray[idxb[i]];
2369     }
2370   }
2371 
2372   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2373   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2374   ierr = PetscFree(idxb);CHKERRQ(ierr);
2375   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2380 {
2381   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2382   PetscInt       n      = A->rmap->n;
2383   PetscInt       cstart = A->cmap->rstart;
2384   PetscInt       *cmap  = mat->garray;
2385   PetscInt       *diagIdx, *offdiagIdx;
2386   Vec            diagV, offdiagV;
2387   PetscScalar    *a, *diagA, *offdiagA;
2388   PetscInt       r;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2394   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2395   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2396   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2397   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2398   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2399   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2400   for (r = 0; r < n; ++r) {
2401     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2402       a[r]   = diagA[r];
2403       idx[r] = cstart + diagIdx[r];
2404     } else {
2405       a[r]   = offdiagA[r];
2406       idx[r] = cmap[offdiagIdx[r]];
2407     }
2408   }
2409   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2411   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2412   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2413   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2414   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2419 {
2420   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2421   PetscInt       n      = A->rmap->n;
2422   PetscInt       cstart = A->cmap->rstart;
2423   PetscInt       *cmap  = mat->garray;
2424   PetscInt       *diagIdx, *offdiagIdx;
2425   Vec            diagV, offdiagV;
2426   PetscScalar    *a, *diagA, *offdiagA;
2427   PetscInt       r;
2428   PetscErrorCode ierr;
2429 
2430   PetscFunctionBegin;
2431   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2433   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2435   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2436   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2437   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2438   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2439   for (r = 0; r < n; ++r) {
2440     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2441       a[r]   = diagA[r];
2442       idx[r] = cstart + diagIdx[r];
2443     } else {
2444       a[r]   = offdiagA[r];
2445       idx[r] = cmap[offdiagIdx[r]];
2446     }
2447   }
2448   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2450   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2452   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2453   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2458 {
2459   PetscErrorCode ierr;
2460   Mat            *dummy;
2461 
2462   PetscFunctionBegin;
2463   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2464   *newmat = *dummy;
2465   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2470 {
2471   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2472   PetscErrorCode ierr;
2473 
2474   PetscFunctionBegin;
2475   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2476   A->factorerrortype = a->A->factorerrortype;
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2481 {
2482   PetscErrorCode ierr;
2483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2484 
2485   PetscFunctionBegin;
2486   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2487   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2488   if (x->assembled) {
2489     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2490   } else {
2491     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2492   }
2493   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2499 {
2500   PetscFunctionBegin;
2501   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2502   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 /*@
2507    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2508 
2509    Collective on Mat
2510 
2511    Input Parameters:
2512 +    A - the matrix
2513 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2514 
2515  Level: advanced
2516 
2517 @*/
2518 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2519 {
2520   PetscErrorCode       ierr;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2528 {
2529   PetscErrorCode       ierr;
2530   PetscBool            sc = PETSC_FALSE,flg;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2534   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2535   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2536   if (flg) {
2537     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2538   }
2539   ierr = PetscOptionsTail();CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2544 {
2545   PetscErrorCode ierr;
2546   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2547   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2548 
2549   PetscFunctionBegin;
2550   if (!Y->preallocated) {
2551     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2552   } else if (!aij->nz) {
2553     PetscInt nonew = aij->nonew;
2554     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2555     aij->nonew = nonew;
2556   }
2557   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2562 {
2563   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2568   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2569   if (d) {
2570     PetscInt rstart;
2571     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2572     *d += rstart;
2573 
2574   }
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2579 {
2580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2581   PetscErrorCode ierr;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 /* -------------------------------------------------------------------*/
2589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2590                                        MatGetRow_MPIAIJ,
2591                                        MatRestoreRow_MPIAIJ,
2592                                        MatMult_MPIAIJ,
2593                                 /* 4*/ MatMultAdd_MPIAIJ,
2594                                        MatMultTranspose_MPIAIJ,
2595                                        MatMultTransposeAdd_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*10*/ 0,
2600                                        0,
2601                                        0,
2602                                        MatSOR_MPIAIJ,
2603                                        MatTranspose_MPIAIJ,
2604                                 /*15*/ MatGetInfo_MPIAIJ,
2605                                        MatEqual_MPIAIJ,
2606                                        MatGetDiagonal_MPIAIJ,
2607                                        MatDiagonalScale_MPIAIJ,
2608                                        MatNorm_MPIAIJ,
2609                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2610                                        MatAssemblyEnd_MPIAIJ,
2611                                        MatSetOption_MPIAIJ,
2612                                        MatZeroEntries_MPIAIJ,
2613                                 /*24*/ MatZeroRows_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*29*/ MatSetUp_MPIAIJ,
2619                                        0,
2620                                        0,
2621                                        MatGetDiagonalBlock_MPIAIJ,
2622                                        0,
2623                                 /*34*/ MatDuplicate_MPIAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                 /*39*/ MatAXPY_MPIAIJ,
2629                                        MatCreateSubMatrices_MPIAIJ,
2630                                        MatIncreaseOverlap_MPIAIJ,
2631                                        MatGetValues_MPIAIJ,
2632                                        MatCopy_MPIAIJ,
2633                                 /*44*/ MatGetRowMax_MPIAIJ,
2634                                        MatScale_MPIAIJ,
2635                                        MatShift_MPIAIJ,
2636                                        MatDiagonalSet_MPIAIJ,
2637                                        MatZeroRowsColumns_MPIAIJ,
2638                                 /*49*/ MatSetRandom_MPIAIJ,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2644                                        0,
2645                                        MatSetUnfactored_MPIAIJ,
2646                                        MatPermute_MPIAIJ,
2647                                        0,
2648                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2649                                        MatDestroy_MPIAIJ,
2650                                        MatView_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                 /*64*/ 0,
2654                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2659                                        MatGetRowMinAbs_MPIAIJ,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*75*/ MatFDColoringApply_AIJ,
2665                                        MatSetFromOptions_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        MatFindZeroDiagonals_MPIAIJ,
2669                                 /*80*/ 0,
2670                                        0,
2671                                        0,
2672                                 /*83*/ MatLoad_MPIAIJ,
2673                                        MatIsSymmetric_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                 /*89*/ 0,
2679                                        0,
2680                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        MatBindToCPU_MPIAIJ,
2688                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2689                                        0,
2690                                        0,
2691                                        MatConjugate_MPIAIJ,
2692                                        0,
2693                                 /*104*/MatSetValuesRow_MPIAIJ,
2694                                        MatRealPart_MPIAIJ,
2695                                        MatImaginaryPart_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                 /*109*/0,
2699                                        0,
2700                                        MatGetRowMin_MPIAIJ,
2701                                        0,
2702                                        MatMissingDiagonal_MPIAIJ,
2703                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2704                                        0,
2705                                        MatGetGhosts_MPIAIJ,
2706                                        0,
2707                                        0,
2708                                 /*119*/0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        MatGetMultiProcBlock_MPIAIJ,
2713                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2714                                        MatGetColumnNorms_MPIAIJ,
2715                                        MatInvertBlockDiagonal_MPIAIJ,
2716                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2717                                        MatCreateSubMatricesMPI_MPIAIJ,
2718                                 /*129*/0,
2719                                        0,
2720                                        0,
2721                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2722                                        0,
2723                                 /*134*/0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                 /*139*/MatSetBlockSizes_MPIAIJ,
2729                                        0,
2730                                        0,
2731                                        MatFDColoringSetUp_MPIXAIJ,
2732                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2733                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2734                                 /*145*/0,
2735                                        0,
2736                                        0
2737 };
2738 
2739 /* ----------------------------------------------------------------------------------------*/
2740 
2741 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2753 {
2754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2759   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2764 {
2765   Mat_MPIAIJ     *b;
2766   PetscErrorCode ierr;
2767   PetscMPIInt    size;
2768 
2769   PetscFunctionBegin;
2770   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2772   b = (Mat_MPIAIJ*)B->data;
2773 
2774 #if defined(PETSC_USE_CTABLE)
2775   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2776 #else
2777   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2778 #endif
2779   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2780   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2781   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2782 
2783   /* Because the B will have been resized we simply destroy it and create a new one each time */
2784   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2785   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2786   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2787   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2788   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2789   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2790   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2791 
2792   if (!B->preallocated) {
2793     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2794     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2795     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2796     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2797     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2798   }
2799 
2800   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2801   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2802   B->preallocated  = PETSC_TRUE;
2803   B->was_assembled = PETSC_FALSE;
2804   B->assembled     = PETSC_FALSE;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2809 {
2810   Mat_MPIAIJ     *b;
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2815   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2817   b = (Mat_MPIAIJ*)B->data;
2818 
2819 #if defined(PETSC_USE_CTABLE)
2820   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2821 #else
2822   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2823 #endif
2824   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2825   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2826   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2827 
2828   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2829   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2830   B->preallocated  = PETSC_TRUE;
2831   B->was_assembled = PETSC_FALSE;
2832   B->assembled = PETSC_FALSE;
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2837 {
2838   Mat            mat;
2839   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2840   PetscErrorCode ierr;
2841 
2842   PetscFunctionBegin;
2843   *newmat = 0;
2844   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2845   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2846   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2847   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2848   a       = (Mat_MPIAIJ*)mat->data;
2849 
2850   mat->factortype   = matin->factortype;
2851   mat->assembled    = matin->assembled;
2852   mat->insertmode   = NOT_SET_VALUES;
2853   mat->preallocated = matin->preallocated;
2854 
2855   a->size         = oldmat->size;
2856   a->rank         = oldmat->rank;
2857   a->donotstash   = oldmat->donotstash;
2858   a->roworiented  = oldmat->roworiented;
2859   a->rowindices   = NULL;
2860   a->rowvalues    = NULL;
2861   a->getrowactive = PETSC_FALSE;
2862 
2863   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2864   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2865 
2866   if (oldmat->colmap) {
2867 #if defined(PETSC_USE_CTABLE)
2868     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2869 #else
2870     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2871     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2872     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2873 #endif
2874   } else a->colmap = NULL;
2875   if (oldmat->garray) {
2876     PetscInt len;
2877     len  = oldmat->B->cmap->n;
2878     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2879     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2880     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2881   } else a->garray = NULL;
2882 
2883   /* It may happen MatDuplicate is called with a non-assembled matrix
2884      In fact, MatDuplicate only requires the matrix to be preallocated
2885      This may happen inside a DMCreateMatrix_Shell */
2886   if (oldmat->lvec) {
2887     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2888     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2889   }
2890   if (oldmat->Mvctx) {
2891     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2892     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2893   }
2894   if (oldmat->Mvctx_mpi1) {
2895     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2896     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2897   }
2898 
2899   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2900   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2901   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2902   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2903   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2904   *newmat = mat;
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2909 {
2910   PetscBool      isbinary, ishdf5;
2911   PetscErrorCode ierr;
2912 
2913   PetscFunctionBegin;
2914   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2915   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2916   /* force binary viewer to load .info file if it has not yet done so */
2917   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2918   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2919   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2920   if (isbinary) {
2921     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2922   } else if (ishdf5) {
2923 #if defined(PETSC_HAVE_HDF5)
2924     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2925 #else
2926     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2927 #endif
2928   } else {
2929     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2930   }
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2935 {
2936   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2937   PetscInt       *rowidxs,*colidxs;
2938   PetscScalar    *matvals;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2943 
2944   /* read in matrix header */
2945   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2946   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2947   M  = header[1]; N = header[2]; nz = header[3];
2948   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2949   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2950   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2951 
2952   /* set block sizes from the viewer's .info file */
2953   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2954   /* set global sizes if not set already */
2955   if (mat->rmap->N < 0) mat->rmap->N = M;
2956   if (mat->cmap->N < 0) mat->cmap->N = N;
2957   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2958   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2959 
2960   /* check if the matrix sizes are correct */
2961   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2962   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2963 
2964   /* read in row lengths and build row indices */
2965   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2966   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2967   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2968   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2969   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2970   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2971   /* read in column indices and matrix values */
2972   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2973   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2974   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2975   /* store matrix indices and values */
2976   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2977   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2978   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2979   PetscFunctionReturn(0);
2980 }
2981 
2982 /* Not scalable because of ISAllGather() unless getting all columns. */
2983 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2984 {
2985   PetscErrorCode ierr;
2986   IS             iscol_local;
2987   PetscBool      isstride;
2988   PetscMPIInt    lisstride=0,gisstride;
2989 
2990   PetscFunctionBegin;
2991   /* check if we are grabbing all columns*/
2992   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2993 
2994   if (isstride) {
2995     PetscInt  start,len,mstart,mlen;
2996     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2997     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2998     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2999     if (mstart == start && mlen-mstart == len) lisstride = 1;
3000   }
3001 
3002   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3003   if (gisstride) {
3004     PetscInt N;
3005     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3006     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3007     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3008     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3009   } else {
3010     PetscInt cbs;
3011     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3012     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3013     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3014   }
3015 
3016   *isseq = iscol_local;
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 /*
3021  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3022  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3023 
3024  Input Parameters:
3025    mat - matrix
3026    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3027            i.e., mat->rstart <= isrow[i] < mat->rend
3028    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3029            i.e., mat->cstart <= iscol[i] < mat->cend
3030  Output Parameter:
3031    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3032    iscol_o - sequential column index set for retrieving mat->B
3033    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3034  */
3035 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3036 {
3037   PetscErrorCode ierr;
3038   Vec            x,cmap;
3039   const PetscInt *is_idx;
3040   PetscScalar    *xarray,*cmaparray;
3041   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3042   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3043   Mat            B=a->B;
3044   Vec            lvec=a->lvec,lcmap;
3045   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3046   MPI_Comm       comm;
3047   VecScatter     Mvctx=a->Mvctx;
3048 
3049   PetscFunctionBegin;
3050   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3051   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3052 
3053   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3054   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3055   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3056   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3057   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3058 
3059   /* Get start indices */
3060   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3061   isstart -= ncols;
3062   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3063 
3064   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3065   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3066   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3067   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3068   for (i=0; i<ncols; i++) {
3069     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3070     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3071     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3072   }
3073   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3074   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3075   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3076 
3077   /* Get iscol_d */
3078   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3079   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3080   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3081 
3082   /* Get isrow_d */
3083   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3084   rstart = mat->rmap->rstart;
3085   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3086   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3087   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3088   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3089 
3090   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3091   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3092   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3093 
3094   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3095   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3096   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3097 
3098   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3099 
3100   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3101   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3102 
3103   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3104   /* off-process column indices */
3105   count = 0;
3106   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3107   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3108 
3109   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3110   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3111   for (i=0; i<Bn; i++) {
3112     if (PetscRealPart(xarray[i]) > -1.0) {
3113       idx[count]     = i;                   /* local column index in off-diagonal part B */
3114       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3115       count++;
3116     }
3117   }
3118   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3119   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3120 
3121   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3122   /* cannot ensure iscol_o has same blocksize as iscol! */
3123 
3124   ierr = PetscFree(idx);CHKERRQ(ierr);
3125   *garray = cmap1;
3126 
3127   ierr = VecDestroy(&x);CHKERRQ(ierr);
3128   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3129   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3134 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3135 {
3136   PetscErrorCode ierr;
3137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3138   Mat            M = NULL;
3139   MPI_Comm       comm;
3140   IS             iscol_d,isrow_d,iscol_o;
3141   Mat            Asub = NULL,Bsub = NULL;
3142   PetscInt       n;
3143 
3144   PetscFunctionBegin;
3145   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3146 
3147   if (call == MAT_REUSE_MATRIX) {
3148     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3149     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3150     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3151 
3152     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3153     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3154 
3155     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3156     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3157 
3158     /* Update diagonal and off-diagonal portions of submat */
3159     asub = (Mat_MPIAIJ*)(*submat)->data;
3160     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3161     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3162     if (n) {
3163       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3164     }
3165     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3166     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3167 
3168   } else { /* call == MAT_INITIAL_MATRIX) */
3169     const PetscInt *garray;
3170     PetscInt        BsubN;
3171 
3172     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3173     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3174 
3175     /* Create local submatrices Asub and Bsub */
3176     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3177     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3178 
3179     /* Create submatrix M */
3180     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3181 
3182     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3183     asub = (Mat_MPIAIJ*)M->data;
3184 
3185     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3186     n = asub->B->cmap->N;
3187     if (BsubN > n) {
3188       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3189       const PetscInt *idx;
3190       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3191       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3192 
3193       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3194       j = 0;
3195       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3196       for (i=0; i<n; i++) {
3197         if (j >= BsubN) break;
3198         while (subgarray[i] > garray[j]) j++;
3199 
3200         if (subgarray[i] == garray[j]) {
3201           idx_new[i] = idx[j++];
3202         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3203       }
3204       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3205 
3206       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3207       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3208 
3209     } else if (BsubN < n) {
3210       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3211     }
3212 
3213     ierr = PetscFree(garray);CHKERRQ(ierr);
3214     *submat = M;
3215 
3216     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3217     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3218     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3219 
3220     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3221     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3222 
3223     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3224     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3225   }
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3230 {
3231   PetscErrorCode ierr;
3232   IS             iscol_local=NULL,isrow_d;
3233   PetscInt       csize;
3234   PetscInt       n,i,j,start,end;
3235   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3236   MPI_Comm       comm;
3237 
3238   PetscFunctionBegin;
3239   /* If isrow has same processor distribution as mat,
3240      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3241   if (call == MAT_REUSE_MATRIX) {
3242     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3243     if (isrow_d) {
3244       sameRowDist  = PETSC_TRUE;
3245       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3246     } else {
3247       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3248       if (iscol_local) {
3249         sameRowDist  = PETSC_TRUE;
3250         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3251       }
3252     }
3253   } else {
3254     /* Check if isrow has same processor distribution as mat */
3255     sameDist[0] = PETSC_FALSE;
3256     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3257     if (!n) {
3258       sameDist[0] = PETSC_TRUE;
3259     } else {
3260       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3261       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3262       if (i >= start && j < end) {
3263         sameDist[0] = PETSC_TRUE;
3264       }
3265     }
3266 
3267     /* Check if iscol has same processor distribution as mat */
3268     sameDist[1] = PETSC_FALSE;
3269     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3270     if (!n) {
3271       sameDist[1] = PETSC_TRUE;
3272     } else {
3273       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3274       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3275       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3276     }
3277 
3278     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3279     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3280     sameRowDist = tsameDist[0];
3281   }
3282 
3283   if (sameRowDist) {
3284     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3285       /* isrow and iscol have same processor distribution as mat */
3286       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3287       PetscFunctionReturn(0);
3288     } else { /* sameRowDist */
3289       /* isrow has same processor distribution as mat */
3290       if (call == MAT_INITIAL_MATRIX) {
3291         PetscBool sorted;
3292         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3293         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3294         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3295         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3296 
3297         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3298         if (sorted) {
3299           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3300           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3301           PetscFunctionReturn(0);
3302         }
3303       } else { /* call == MAT_REUSE_MATRIX */
3304         IS    iscol_sub;
3305         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3306         if (iscol_sub) {
3307           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3308           PetscFunctionReturn(0);
3309         }
3310       }
3311     }
3312   }
3313 
3314   /* General case: iscol -> iscol_local which has global size of iscol */
3315   if (call == MAT_REUSE_MATRIX) {
3316     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3317     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3318   } else {
3319     if (!iscol_local) {
3320       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3321     }
3322   }
3323 
3324   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3325   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3326 
3327   if (call == MAT_INITIAL_MATRIX) {
3328     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3329     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3330   }
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 /*@C
3335      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3336          and "off-diagonal" part of the matrix in CSR format.
3337 
3338    Collective
3339 
3340    Input Parameters:
3341 +  comm - MPI communicator
3342 .  A - "diagonal" portion of matrix
3343 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3344 -  garray - global index of B columns
3345 
3346    Output Parameter:
3347 .   mat - the matrix, with input A as its local diagonal matrix
3348    Level: advanced
3349 
3350    Notes:
3351        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3352        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3353 
3354 .seealso: MatCreateMPIAIJWithSplitArrays()
3355 @*/
3356 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3357 {
3358   PetscErrorCode ierr;
3359   Mat_MPIAIJ     *maij;
3360   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3361   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3362   PetscScalar    *oa=b->a;
3363   Mat            Bnew;
3364   PetscInt       m,n,N;
3365 
3366   PetscFunctionBegin;
3367   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3368   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3369   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3370   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3371   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3372   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3373 
3374   /* Get global columns of mat */
3375   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3376 
3377   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3378   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3379   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3380   maij = (Mat_MPIAIJ*)(*mat)->data;
3381 
3382   (*mat)->preallocated = PETSC_TRUE;
3383 
3384   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3385   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3386 
3387   /* Set A as diagonal portion of *mat */
3388   maij->A = A;
3389 
3390   nz = oi[m];
3391   for (i=0; i<nz; i++) {
3392     col   = oj[i];
3393     oj[i] = garray[col];
3394   }
3395 
3396    /* Set Bnew as off-diagonal portion of *mat */
3397   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3398   bnew        = (Mat_SeqAIJ*)Bnew->data;
3399   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3400   maij->B     = Bnew;
3401 
3402   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3403 
3404   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3405   b->free_a       = PETSC_FALSE;
3406   b->free_ij      = PETSC_FALSE;
3407   ierr = MatDestroy(&B);CHKERRQ(ierr);
3408 
3409   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3410   bnew->free_a       = PETSC_TRUE;
3411   bnew->free_ij      = PETSC_TRUE;
3412 
3413   /* condense columns of maij->B */
3414   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3415   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3416   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3417   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3418   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3423 
3424 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3425 {
3426   PetscErrorCode ierr;
3427   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3428   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3429   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3430   Mat            M,Msub,B=a->B;
3431   MatScalar      *aa;
3432   Mat_SeqAIJ     *aij;
3433   PetscInt       *garray = a->garray,*colsub,Ncols;
3434   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3435   IS             iscol_sub,iscmap;
3436   const PetscInt *is_idx,*cmap;
3437   PetscBool      allcolumns=PETSC_FALSE;
3438   MPI_Comm       comm;
3439 
3440   PetscFunctionBegin;
3441   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3442 
3443   if (call == MAT_REUSE_MATRIX) {
3444     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3445     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3446     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3447 
3448     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3449     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3450 
3451     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3452     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3453 
3454     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3455 
3456   } else { /* call == MAT_INITIAL_MATRIX) */
3457     PetscBool flg;
3458 
3459     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3460     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3461 
3462     /* (1) iscol -> nonscalable iscol_local */
3463     /* Check for special case: each processor gets entire matrix columns */
3464     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3465     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3466     if (allcolumns) {
3467       iscol_sub = iscol_local;
3468       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3469       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3470 
3471     } else {
3472       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3473       PetscInt *idx,*cmap1,k;
3474       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3475       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3476       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3477       count = 0;
3478       k     = 0;
3479       for (i=0; i<Ncols; i++) {
3480         j = is_idx[i];
3481         if (j >= cstart && j < cend) {
3482           /* diagonal part of mat */
3483           idx[count]     = j;
3484           cmap1[count++] = i; /* column index in submat */
3485         } else if (Bn) {
3486           /* off-diagonal part of mat */
3487           if (j == garray[k]) {
3488             idx[count]     = j;
3489             cmap1[count++] = i;  /* column index in submat */
3490           } else if (j > garray[k]) {
3491             while (j > garray[k] && k < Bn-1) k++;
3492             if (j == garray[k]) {
3493               idx[count]     = j;
3494               cmap1[count++] = i; /* column index in submat */
3495             }
3496           }
3497         }
3498       }
3499       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3500 
3501       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3502       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3503       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3504 
3505       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3506     }
3507 
3508     /* (3) Create sequential Msub */
3509     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3510   }
3511 
3512   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3513   aij  = (Mat_SeqAIJ*)(Msub)->data;
3514   ii   = aij->i;
3515   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3516 
3517   /*
3518       m - number of local rows
3519       Ncols - number of columns (same on all processors)
3520       rstart - first row in new global matrix generated
3521   */
3522   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3523 
3524   if (call == MAT_INITIAL_MATRIX) {
3525     /* (4) Create parallel newmat */
3526     PetscMPIInt    rank,size;
3527     PetscInt       csize;
3528 
3529     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3531 
3532     /*
3533         Determine the number of non-zeros in the diagonal and off-diagonal
3534         portions of the matrix in order to do correct preallocation
3535     */
3536 
3537     /* first get start and end of "diagonal" columns */
3538     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3539     if (csize == PETSC_DECIDE) {
3540       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3541       if (mglobal == Ncols) { /* square matrix */
3542         nlocal = m;
3543       } else {
3544         nlocal = Ncols/size + ((Ncols % size) > rank);
3545       }
3546     } else {
3547       nlocal = csize;
3548     }
3549     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3550     rstart = rend - nlocal;
3551     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3552 
3553     /* next, compute all the lengths */
3554     jj    = aij->j;
3555     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3556     olens = dlens + m;
3557     for (i=0; i<m; i++) {
3558       jend = ii[i+1] - ii[i];
3559       olen = 0;
3560       dlen = 0;
3561       for (j=0; j<jend; j++) {
3562         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3563         else dlen++;
3564         jj++;
3565       }
3566       olens[i] = olen;
3567       dlens[i] = dlen;
3568     }
3569 
3570     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3571     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3572 
3573     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3574     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3575     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3576     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3577     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3578     ierr = PetscFree(dlens);CHKERRQ(ierr);
3579 
3580   } else { /* call == MAT_REUSE_MATRIX */
3581     M    = *newmat;
3582     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3583     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3584     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3585     /*
3586          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3587        rather than the slower MatSetValues().
3588     */
3589     M->was_assembled = PETSC_TRUE;
3590     M->assembled     = PETSC_FALSE;
3591   }
3592 
3593   /* (5) Set values of Msub to *newmat */
3594   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3595   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3596 
3597   jj   = aij->j;
3598   aa   = aij->a;
3599   for (i=0; i<m; i++) {
3600     row = rstart + i;
3601     nz  = ii[i+1] - ii[i];
3602     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3603     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3604     jj += nz; aa += nz;
3605   }
3606   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3607 
3608   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3609   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610 
3611   ierr = PetscFree(colsub);CHKERRQ(ierr);
3612 
3613   /* save Msub, iscol_sub and iscmap used in processor for next request */
3614   if (call ==  MAT_INITIAL_MATRIX) {
3615     *newmat = M;
3616     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3617     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3618 
3619     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3620     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3621 
3622     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3623     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3624 
3625     if (iscol_local) {
3626       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3627       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3628     }
3629   }
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 /*
3634     Not great since it makes two copies of the submatrix, first an SeqAIJ
3635   in local and then by concatenating the local matrices the end result.
3636   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3637 
3638   Note: This requires a sequential iscol with all indices.
3639 */
3640 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3641 {
3642   PetscErrorCode ierr;
3643   PetscMPIInt    rank,size;
3644   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3645   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3646   Mat            M,Mreuse;
3647   MatScalar      *aa,*vwork;
3648   MPI_Comm       comm;
3649   Mat_SeqAIJ     *aij;
3650   PetscBool      colflag,allcolumns=PETSC_FALSE;
3651 
3652   PetscFunctionBegin;
3653   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3654   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3655   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3656 
3657   /* Check for special case: each processor gets entire matrix columns */
3658   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3659   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3660   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3661 
3662   if (call ==  MAT_REUSE_MATRIX) {
3663     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3664     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3665     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3666   } else {
3667     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3668   }
3669 
3670   /*
3671       m - number of local rows
3672       n - number of columns (same on all processors)
3673       rstart - first row in new global matrix generated
3674   */
3675   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3676   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3677   if (call == MAT_INITIAL_MATRIX) {
3678     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3679     ii  = aij->i;
3680     jj  = aij->j;
3681 
3682     /*
3683         Determine the number of non-zeros in the diagonal and off-diagonal
3684         portions of the matrix in order to do correct preallocation
3685     */
3686 
3687     /* first get start and end of "diagonal" columns */
3688     if (csize == PETSC_DECIDE) {
3689       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3690       if (mglobal == n) { /* square matrix */
3691         nlocal = m;
3692       } else {
3693         nlocal = n/size + ((n % size) > rank);
3694       }
3695     } else {
3696       nlocal = csize;
3697     }
3698     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3699     rstart = rend - nlocal;
3700     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3701 
3702     /* next, compute all the lengths */
3703     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3704     olens = dlens + m;
3705     for (i=0; i<m; i++) {
3706       jend = ii[i+1] - ii[i];
3707       olen = 0;
3708       dlen = 0;
3709       for (j=0; j<jend; j++) {
3710         if (*jj < rstart || *jj >= rend) olen++;
3711         else dlen++;
3712         jj++;
3713       }
3714       olens[i] = olen;
3715       dlens[i] = dlen;
3716     }
3717     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3718     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3719     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3720     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3721     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3722     ierr = PetscFree(dlens);CHKERRQ(ierr);
3723   } else {
3724     PetscInt ml,nl;
3725 
3726     M    = *newmat;
3727     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3728     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3729     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3730     /*
3731          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3732        rather than the slower MatSetValues().
3733     */
3734     M->was_assembled = PETSC_TRUE;
3735     M->assembled     = PETSC_FALSE;
3736   }
3737   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3738   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3739   ii   = aij->i;
3740   jj   = aij->j;
3741   aa   = aij->a;
3742   for (i=0; i<m; i++) {
3743     row   = rstart + i;
3744     nz    = ii[i+1] - ii[i];
3745     cwork = jj;     jj += nz;
3746     vwork = aa;     aa += nz;
3747     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3748   }
3749 
3750   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3751   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752   *newmat = M;
3753 
3754   /* save submatrix used in processor for next request */
3755   if (call ==  MAT_INITIAL_MATRIX) {
3756     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3757     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3758   }
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3763 {
3764   PetscInt       m,cstart, cend,j,nnz,i,d;
3765   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3766   const PetscInt *JJ;
3767   PetscErrorCode ierr;
3768   PetscBool      nooffprocentries;
3769 
3770   PetscFunctionBegin;
3771   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3772 
3773   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3774   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3775   m      = B->rmap->n;
3776   cstart = B->cmap->rstart;
3777   cend   = B->cmap->rend;
3778   rstart = B->rmap->rstart;
3779 
3780   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3781 
3782   if (PetscDefined(USE_DEBUG)) {
3783     for (i=0; i<m; i++) {
3784       nnz = Ii[i+1]- Ii[i];
3785       JJ  = J + Ii[i];
3786       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3787       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3788       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3789     }
3790   }
3791 
3792   for (i=0; i<m; i++) {
3793     nnz     = Ii[i+1]- Ii[i];
3794     JJ      = J + Ii[i];
3795     nnz_max = PetscMax(nnz_max,nnz);
3796     d       = 0;
3797     for (j=0; j<nnz; j++) {
3798       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3799     }
3800     d_nnz[i] = d;
3801     o_nnz[i] = nnz - d;
3802   }
3803   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3804   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3805 
3806   for (i=0; i<m; i++) {
3807     ii   = i + rstart;
3808     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3809   }
3810   nooffprocentries    = B->nooffprocentries;
3811   B->nooffprocentries = PETSC_TRUE;
3812   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3813   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3814   B->nooffprocentries = nooffprocentries;
3815 
3816   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3817   PetscFunctionReturn(0);
3818 }
3819 
3820 /*@
3821    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3822    (the default parallel PETSc format).
3823 
3824    Collective
3825 
3826    Input Parameters:
3827 +  B - the matrix
3828 .  i - the indices into j for the start of each local row (starts with zero)
3829 .  j - the column indices for each local row (starts with zero)
3830 -  v - optional values in the matrix
3831 
3832    Level: developer
3833 
3834    Notes:
3835        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3836      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3837      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3838 
3839        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3840 
3841        The format which is used for the sparse matrix input, is equivalent to a
3842     row-major ordering.. i.e for the following matrix, the input data expected is
3843     as shown
3844 
3845 $        1 0 0
3846 $        2 0 3     P0
3847 $       -------
3848 $        4 5 6     P1
3849 $
3850 $     Process0 [P0]: rows_owned=[0,1]
3851 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3852 $        j =  {0,0,2}  [size = 3]
3853 $        v =  {1,2,3}  [size = 3]
3854 $
3855 $     Process1 [P1]: rows_owned=[2]
3856 $        i =  {0,3}    [size = nrow+1  = 1+1]
3857 $        j =  {0,1,2}  [size = 3]
3858 $        v =  {4,5,6}  [size = 3]
3859 
3860 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3861           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3862 @*/
3863 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3864 {
3865   PetscErrorCode ierr;
3866 
3867   PetscFunctionBegin;
3868   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3869   PetscFunctionReturn(0);
3870 }
3871 
3872 /*@C
3873    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3874    (the default parallel PETSc format).  For good matrix assembly performance
3875    the user should preallocate the matrix storage by setting the parameters
3876    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3877    performance can be increased by more than a factor of 50.
3878 
3879    Collective
3880 
3881    Input Parameters:
3882 +  B - the matrix
3883 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3884            (same value is used for all local rows)
3885 .  d_nnz - array containing the number of nonzeros in the various rows of the
3886            DIAGONAL portion of the local submatrix (possibly different for each row)
3887            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3888            The size of this array is equal to the number of local rows, i.e 'm'.
3889            For matrices that will be factored, you must leave room for (and set)
3890            the diagonal entry even if it is zero.
3891 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3892            submatrix (same value is used for all local rows).
3893 -  o_nnz - array containing the number of nonzeros in the various rows of the
3894            OFF-DIAGONAL portion of the local submatrix (possibly different for
3895            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3896            structure. The size of this array is equal to the number
3897            of local rows, i.e 'm'.
3898 
3899    If the *_nnz parameter is given then the *_nz parameter is ignored
3900 
3901    The AIJ format (also called the Yale sparse matrix format or
3902    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3903    storage.  The stored row and column indices begin with zero.
3904    See Users-Manual: ch_mat for details.
3905 
3906    The parallel matrix is partitioned such that the first m0 rows belong to
3907    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3908    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3909 
3910    The DIAGONAL portion of the local submatrix of a processor can be defined
3911    as the submatrix which is obtained by extraction the part corresponding to
3912    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3913    first row that belongs to the processor, r2 is the last row belonging to
3914    the this processor, and c1-c2 is range of indices of the local part of a
3915    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3916    common case of a square matrix, the row and column ranges are the same and
3917    the DIAGONAL part is also square. The remaining portion of the local
3918    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3919 
3920    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3921 
3922    You can call MatGetInfo() to get information on how effective the preallocation was;
3923    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3924    You can also run with the option -info and look for messages with the string
3925    malloc in them to see if additional memory allocation was needed.
3926 
3927    Example usage:
3928 
3929    Consider the following 8x8 matrix with 34 non-zero values, that is
3930    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3931    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3932    as follows:
3933 
3934 .vb
3935             1  2  0  |  0  3  0  |  0  4
3936     Proc0   0  5  6  |  7  0  0  |  8  0
3937             9  0 10  | 11  0  0  | 12  0
3938     -------------------------------------
3939            13  0 14  | 15 16 17  |  0  0
3940     Proc1   0 18  0  | 19 20 21  |  0  0
3941             0  0  0  | 22 23  0  | 24  0
3942     -------------------------------------
3943     Proc2  25 26 27  |  0  0 28  | 29  0
3944            30  0  0  | 31 32 33  |  0 34
3945 .ve
3946 
3947    This can be represented as a collection of submatrices as:
3948 
3949 .vb
3950       A B C
3951       D E F
3952       G H I
3953 .ve
3954 
3955    Where the submatrices A,B,C are owned by proc0, D,E,F are
3956    owned by proc1, G,H,I are owned by proc2.
3957 
3958    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3959    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3960    The 'M','N' parameters are 8,8, and have the same values on all procs.
3961 
3962    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3963    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3964    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3965    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3966    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3967    matrix, ans [DF] as another SeqAIJ matrix.
3968 
3969    When d_nz, o_nz parameters are specified, d_nz storage elements are
3970    allocated for every row of the local diagonal submatrix, and o_nz
3971    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3972    One way to choose d_nz and o_nz is to use the max nonzerors per local
3973    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3974    In this case, the values of d_nz,o_nz are:
3975 .vb
3976      proc0 : dnz = 2, o_nz = 2
3977      proc1 : dnz = 3, o_nz = 2
3978      proc2 : dnz = 1, o_nz = 4
3979 .ve
3980    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3981    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3982    for proc3. i.e we are using 12+15+10=37 storage locations to store
3983    34 values.
3984 
3985    When d_nnz, o_nnz parameters are specified, the storage is specified
3986    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3987    In the above case the values for d_nnz,o_nnz are:
3988 .vb
3989      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3990      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3991      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3992 .ve
3993    Here the space allocated is sum of all the above values i.e 34, and
3994    hence pre-allocation is perfect.
3995 
3996    Level: intermediate
3997 
3998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3999           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4000 @*/
4001 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4002 {
4003   PetscErrorCode ierr;
4004 
4005   PetscFunctionBegin;
4006   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4007   PetscValidType(B,1);
4008   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4009   PetscFunctionReturn(0);
4010 }
4011 
4012 /*@
4013      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4014          CSR format for the local rows.
4015 
4016    Collective
4017 
4018    Input Parameters:
4019 +  comm - MPI communicator
4020 .  m - number of local rows (Cannot be PETSC_DECIDE)
4021 .  n - This value should be the same as the local size used in creating the
4022        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4023        calculated if N is given) For square matrices n is almost always m.
4024 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4025 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4026 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4027 .   j - column indices
4028 -   a - matrix values
4029 
4030    Output Parameter:
4031 .   mat - the matrix
4032 
4033    Level: intermediate
4034 
4035    Notes:
4036        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4037      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4038      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4039 
4040        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4041 
4042        The format which is used for the sparse matrix input, is equivalent to a
4043     row-major ordering.. i.e for the following matrix, the input data expected is
4044     as shown
4045 
4046        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4047 
4048 $        1 0 0
4049 $        2 0 3     P0
4050 $       -------
4051 $        4 5 6     P1
4052 $
4053 $     Process0 [P0]: rows_owned=[0,1]
4054 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4055 $        j =  {0,0,2}  [size = 3]
4056 $        v =  {1,2,3}  [size = 3]
4057 $
4058 $     Process1 [P1]: rows_owned=[2]
4059 $        i =  {0,3}    [size = nrow+1  = 1+1]
4060 $        j =  {0,1,2}  [size = 3]
4061 $        v =  {4,5,6}  [size = 3]
4062 
4063 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4064           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4065 @*/
4066 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4067 {
4068   PetscErrorCode ierr;
4069 
4070   PetscFunctionBegin;
4071   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4072   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4073   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4074   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4075   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4076   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4077   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4078   PetscFunctionReturn(0);
4079 }
4080 
4081 /*@
4082      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4083          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4084 
4085    Collective
4086 
4087    Input Parameters:
4088 +  mat - the matrix
4089 .  m - number of local rows (Cannot be PETSC_DECIDE)
4090 .  n - This value should be the same as the local size used in creating the
4091        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4092        calculated if N is given) For square matrices n is almost always m.
4093 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4094 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4095 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4096 .  J - column indices
4097 -  v - matrix values
4098 
4099    Level: intermediate
4100 
4101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4102           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4103 @*/
4104 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4105 {
4106   PetscErrorCode ierr;
4107   PetscInt       cstart,nnz,i,j;
4108   PetscInt       *ld;
4109   PetscBool      nooffprocentries;
4110   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4111   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4112   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4113   const PetscInt *Adi = Ad->i;
4114   PetscInt       ldi,Iii,md;
4115 
4116   PetscFunctionBegin;
4117   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4118   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4119   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4120   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4121 
4122   cstart = mat->cmap->rstart;
4123   if (!Aij->ld) {
4124     /* count number of entries below block diagonal */
4125     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4126     Aij->ld = ld;
4127     for (i=0; i<m; i++) {
4128       nnz  = Ii[i+1]- Ii[i];
4129       j     = 0;
4130       while  (J[j] < cstart && j < nnz) {j++;}
4131       J    += nnz;
4132       ld[i] = j;
4133     }
4134   } else {
4135     ld = Aij->ld;
4136   }
4137 
4138   for (i=0; i<m; i++) {
4139     nnz  = Ii[i+1]- Ii[i];
4140     Iii  = Ii[i];
4141     ldi  = ld[i];
4142     md   = Adi[i+1]-Adi[i];
4143     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4144     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4145     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4146     ad  += md;
4147     ao  += nnz - md;
4148   }
4149   nooffprocentries      = mat->nooffprocentries;
4150   mat->nooffprocentries = PETSC_TRUE;
4151   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4152   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4153   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4154   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4155   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4156   mat->nooffprocentries = nooffprocentries;
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 /*@C
4161    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4162    (the default parallel PETSc format).  For good matrix assembly performance
4163    the user should preallocate the matrix storage by setting the parameters
4164    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4165    performance can be increased by more than a factor of 50.
4166 
4167    Collective
4168 
4169    Input Parameters:
4170 +  comm - MPI communicator
4171 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4172            This value should be the same as the local size used in creating the
4173            y vector for the matrix-vector product y = Ax.
4174 .  n - This value should be the same as the local size used in creating the
4175        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4176        calculated if N is given) For square matrices n is almost always m.
4177 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4178 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4179 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4180            (same value is used for all local rows)
4181 .  d_nnz - array containing the number of nonzeros in the various rows of the
4182            DIAGONAL portion of the local submatrix (possibly different for each row)
4183            or NULL, if d_nz is used to specify the nonzero structure.
4184            The size of this array is equal to the number of local rows, i.e 'm'.
4185 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4186            submatrix (same value is used for all local rows).
4187 -  o_nnz - array containing the number of nonzeros in the various rows of the
4188            OFF-DIAGONAL portion of the local submatrix (possibly different for
4189            each row) or NULL, if o_nz is used to specify the nonzero
4190            structure. The size of this array is equal to the number
4191            of local rows, i.e 'm'.
4192 
4193    Output Parameter:
4194 .  A - the matrix
4195 
4196    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4197    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4198    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4199 
4200    Notes:
4201    If the *_nnz parameter is given then the *_nz parameter is ignored
4202 
4203    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4204    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4205    storage requirements for this matrix.
4206 
4207    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4208    processor than it must be used on all processors that share the object for
4209    that argument.
4210 
4211    The user MUST specify either the local or global matrix dimensions
4212    (possibly both).
4213 
4214    The parallel matrix is partitioned across processors such that the
4215    first m0 rows belong to process 0, the next m1 rows belong to
4216    process 1, the next m2 rows belong to process 2 etc.. where
4217    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4218    values corresponding to [m x N] submatrix.
4219 
4220    The columns are logically partitioned with the n0 columns belonging
4221    to 0th partition, the next n1 columns belonging to the next
4222    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4223 
4224    The DIAGONAL portion of the local submatrix on any given processor
4225    is the submatrix corresponding to the rows and columns m,n
4226    corresponding to the given processor. i.e diagonal matrix on
4227    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4228    etc. The remaining portion of the local submatrix [m x (N-n)]
4229    constitute the OFF-DIAGONAL portion. The example below better
4230    illustrates this concept.
4231 
4232    For a square global matrix we define each processor's diagonal portion
4233    to be its local rows and the corresponding columns (a square submatrix);
4234    each processor's off-diagonal portion encompasses the remainder of the
4235    local matrix (a rectangular submatrix).
4236 
4237    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4238 
4239    When calling this routine with a single process communicator, a matrix of
4240    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4241    type of communicator, use the construction mechanism
4242 .vb
4243      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4244 .ve
4245 
4246 $     MatCreate(...,&A);
4247 $     MatSetType(A,MATMPIAIJ);
4248 $     MatSetSizes(A, m,n,M,N);
4249 $     MatMPIAIJSetPreallocation(A,...);
4250 
4251    By default, this format uses inodes (identical nodes) when possible.
4252    We search for consecutive rows with the same nonzero structure, thereby
4253    reusing matrix information to achieve increased efficiency.
4254 
4255    Options Database Keys:
4256 +  -mat_no_inode  - Do not use inodes
4257 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4258 
4259 
4260 
4261    Example usage:
4262 
4263    Consider the following 8x8 matrix with 34 non-zero values, that is
4264    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4265    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4266    as follows
4267 
4268 .vb
4269             1  2  0  |  0  3  0  |  0  4
4270     Proc0   0  5  6  |  7  0  0  |  8  0
4271             9  0 10  | 11  0  0  | 12  0
4272     -------------------------------------
4273            13  0 14  | 15 16 17  |  0  0
4274     Proc1   0 18  0  | 19 20 21  |  0  0
4275             0  0  0  | 22 23  0  | 24  0
4276     -------------------------------------
4277     Proc2  25 26 27  |  0  0 28  | 29  0
4278            30  0  0  | 31 32 33  |  0 34
4279 .ve
4280 
4281    This can be represented as a collection of submatrices as
4282 
4283 .vb
4284       A B C
4285       D E F
4286       G H I
4287 .ve
4288 
4289    Where the submatrices A,B,C are owned by proc0, D,E,F are
4290    owned by proc1, G,H,I are owned by proc2.
4291 
4292    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4293    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4294    The 'M','N' parameters are 8,8, and have the same values on all procs.
4295 
4296    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4297    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4298    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4299    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4300    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4301    matrix, ans [DF] as another SeqAIJ matrix.
4302 
4303    When d_nz, o_nz parameters are specified, d_nz storage elements are
4304    allocated for every row of the local diagonal submatrix, and o_nz
4305    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4306    One way to choose d_nz and o_nz is to use the max nonzerors per local
4307    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4308    In this case, the values of d_nz,o_nz are
4309 .vb
4310      proc0 : dnz = 2, o_nz = 2
4311      proc1 : dnz = 3, o_nz = 2
4312      proc2 : dnz = 1, o_nz = 4
4313 .ve
4314    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4315    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4316    for proc3. i.e we are using 12+15+10=37 storage locations to store
4317    34 values.
4318 
4319    When d_nnz, o_nnz parameters are specified, the storage is specified
4320    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4321    In the above case the values for d_nnz,o_nnz are
4322 .vb
4323      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4324      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4325      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4326 .ve
4327    Here the space allocated is sum of all the above values i.e 34, and
4328    hence pre-allocation is perfect.
4329 
4330    Level: intermediate
4331 
4332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4333           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4334 @*/
4335 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4336 {
4337   PetscErrorCode ierr;
4338   PetscMPIInt    size;
4339 
4340   PetscFunctionBegin;
4341   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4342   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4343   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4344   if (size > 1) {
4345     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4346     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4347   } else {
4348     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4349     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4350   }
4351   PetscFunctionReturn(0);
4352 }
4353 
4354 /*@C
4355   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4356 
4357   Not collective
4358 
4359   Input Parameter:
4360 . A - The MPIAIJ matrix
4361 
4362   Output Parameters:
4363 + Ad - The local diagonal block as a SeqAIJ matrix
4364 . Ao - The local off-diagonal block as a SeqAIJ matrix
4365 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4366 
4367   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4368   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4369   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4370   local column numbers to global column numbers in the original matrix.
4371 
4372   Level: intermediate
4373 
4374 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4375 @*/
4376 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4377 {
4378   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4379   PetscBool      flg;
4380   PetscErrorCode ierr;
4381 
4382   PetscFunctionBegin;
4383   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4384   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4385   if (Ad)     *Ad     = a->A;
4386   if (Ao)     *Ao     = a->B;
4387   if (colmap) *colmap = a->garray;
4388   PetscFunctionReturn(0);
4389 }
4390 
4391 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4392 {
4393   PetscErrorCode ierr;
4394   PetscInt       m,N,i,rstart,nnz,Ii;
4395   PetscInt       *indx;
4396   PetscScalar    *values;
4397 
4398   PetscFunctionBegin;
4399   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4400   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4401     PetscInt       *dnz,*onz,sum,bs,cbs;
4402 
4403     if (n == PETSC_DECIDE) {
4404       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4405     }
4406     /* Check sum(n) = N */
4407     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4408     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4409 
4410     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4411     rstart -= m;
4412 
4413     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4414     for (i=0; i<m; i++) {
4415       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4416       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4417       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4418     }
4419 
4420     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4421     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4422     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4423     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4424     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4425     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4426     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4427     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4428   }
4429 
4430   /* numeric phase */
4431   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4432   for (i=0; i<m; i++) {
4433     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4434     Ii   = i + rstart;
4435     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4436     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4437   }
4438   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4439   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4440   PetscFunctionReturn(0);
4441 }
4442 
4443 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4444 {
4445   PetscErrorCode    ierr;
4446   PetscMPIInt       rank;
4447   PetscInt          m,N,i,rstart,nnz;
4448   size_t            len;
4449   const PetscInt    *indx;
4450   PetscViewer       out;
4451   char              *name;
4452   Mat               B;
4453   const PetscScalar *values;
4454 
4455   PetscFunctionBegin;
4456   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4457   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4458   /* Should this be the type of the diagonal block of A? */
4459   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4460   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4461   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4462   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4463   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4464   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4465   for (i=0; i<m; i++) {
4466     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4467     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4468     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4469   }
4470   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4471   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472 
4473   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4474   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4475   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4476   sprintf(name,"%s.%d",outfile,rank);
4477   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4478   ierr = PetscFree(name);CHKERRQ(ierr);
4479   ierr = MatView(B,out);CHKERRQ(ierr);
4480   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4481   ierr = MatDestroy(&B);CHKERRQ(ierr);
4482   PetscFunctionReturn(0);
4483 }
4484 
4485 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4486 {
4487   PetscErrorCode      ierr;
4488   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4489 
4490   PetscFunctionBegin;
4491   if (!merge) PetscFunctionReturn(0);
4492   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4493   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4494   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4495   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4496   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4497   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4498   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4499   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4500   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4501   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4502   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4503   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4504   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4505   ierr = PetscFree(merge);CHKERRQ(ierr);
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 #include <../src/mat/utils/freespace.h>
4510 #include <petscbt.h>
4511 
4512 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4513 {
4514   PetscErrorCode      ierr;
4515   MPI_Comm            comm;
4516   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4517   PetscMPIInt         size,rank,taga,*len_s;
4518   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4519   PetscInt            proc,m;
4520   PetscInt            **buf_ri,**buf_rj;
4521   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4522   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4523   MPI_Request         *s_waits,*r_waits;
4524   MPI_Status          *status;
4525   MatScalar           *aa=a->a;
4526   MatScalar           **abuf_r,*ba_i;
4527   Mat_Merge_SeqsToMPI *merge;
4528   PetscContainer      container;
4529 
4530   PetscFunctionBegin;
4531   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4532   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4533 
4534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4535   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4536 
4537   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4538   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4539   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4540 
4541   bi     = merge->bi;
4542   bj     = merge->bj;
4543   buf_ri = merge->buf_ri;
4544   buf_rj = merge->buf_rj;
4545 
4546   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4547   owners = merge->rowmap->range;
4548   len_s  = merge->len_s;
4549 
4550   /* send and recv matrix values */
4551   /*-----------------------------*/
4552   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4553   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4554 
4555   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4556   for (proc=0,k=0; proc<size; proc++) {
4557     if (!len_s[proc]) continue;
4558     i    = owners[proc];
4559     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4560     k++;
4561   }
4562 
4563   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4564   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4565   ierr = PetscFree(status);CHKERRQ(ierr);
4566 
4567   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4568   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4569 
4570   /* insert mat values of mpimat */
4571   /*----------------------------*/
4572   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4573   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4574 
4575   for (k=0; k<merge->nrecv; k++) {
4576     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4577     nrows       = *(buf_ri_k[k]);
4578     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4579     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4580   }
4581 
4582   /* set values of ba */
4583   m = merge->rowmap->n;
4584   for (i=0; i<m; i++) {
4585     arow = owners[rank] + i;
4586     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4587     bnzi = bi[i+1] - bi[i];
4588     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4589 
4590     /* add local non-zero vals of this proc's seqmat into ba */
4591     anzi   = ai[arow+1] - ai[arow];
4592     aj     = a->j + ai[arow];
4593     aa     = a->a + ai[arow];
4594     nextaj = 0;
4595     for (j=0; nextaj<anzi; j++) {
4596       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4597         ba_i[j] += aa[nextaj++];
4598       }
4599     }
4600 
4601     /* add received vals into ba */
4602     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4603       /* i-th row */
4604       if (i == *nextrow[k]) {
4605         anzi   = *(nextai[k]+1) - *nextai[k];
4606         aj     = buf_rj[k] + *(nextai[k]);
4607         aa     = abuf_r[k] + *(nextai[k]);
4608         nextaj = 0;
4609         for (j=0; nextaj<anzi; j++) {
4610           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4611             ba_i[j] += aa[nextaj++];
4612           }
4613         }
4614         nextrow[k]++; nextai[k]++;
4615       }
4616     }
4617     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4618   }
4619   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4620   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4621 
4622   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4623   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4624   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4625   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4626   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4627   PetscFunctionReturn(0);
4628 }
4629 
4630 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4631 {
4632   PetscErrorCode      ierr;
4633   Mat                 B_mpi;
4634   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4635   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4636   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4637   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4638   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4639   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4640   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4641   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4642   MPI_Status          *status;
4643   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4644   PetscBT             lnkbt;
4645   Mat_Merge_SeqsToMPI *merge;
4646   PetscContainer      container;
4647 
4648   PetscFunctionBegin;
4649   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4650 
4651   /* make sure it is a PETSc comm */
4652   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4654   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4655 
4656   ierr = PetscNew(&merge);CHKERRQ(ierr);
4657   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4658 
4659   /* determine row ownership */
4660   /*---------------------------------------------------------*/
4661   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4662   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4663   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4664   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4666   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4667   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4668 
4669   m      = merge->rowmap->n;
4670   owners = merge->rowmap->range;
4671 
4672   /* determine the number of messages to send, their lengths */
4673   /*---------------------------------------------------------*/
4674   len_s = merge->len_s;
4675 
4676   len          = 0; /* length of buf_si[] */
4677   merge->nsend = 0;
4678   for (proc=0; proc<size; proc++) {
4679     len_si[proc] = 0;
4680     if (proc == rank) {
4681       len_s[proc] = 0;
4682     } else {
4683       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4684       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4685     }
4686     if (len_s[proc]) {
4687       merge->nsend++;
4688       nrows = 0;
4689       for (i=owners[proc]; i<owners[proc+1]; i++) {
4690         if (ai[i+1] > ai[i]) nrows++;
4691       }
4692       len_si[proc] = 2*(nrows+1);
4693       len         += len_si[proc];
4694     }
4695   }
4696 
4697   /* determine the number and length of messages to receive for ij-structure */
4698   /*-------------------------------------------------------------------------*/
4699   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4700   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4701 
4702   /* post the Irecv of j-structure */
4703   /*-------------------------------*/
4704   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4705   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4706 
4707   /* post the Isend of j-structure */
4708   /*--------------------------------*/
4709   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4710 
4711   for (proc=0, k=0; proc<size; proc++) {
4712     if (!len_s[proc]) continue;
4713     i    = owners[proc];
4714     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4715     k++;
4716   }
4717 
4718   /* receives and sends of j-structure are complete */
4719   /*------------------------------------------------*/
4720   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4721   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4722 
4723   /* send and recv i-structure */
4724   /*---------------------------*/
4725   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4726   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4727 
4728   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4729   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4730   for (proc=0,k=0; proc<size; proc++) {
4731     if (!len_s[proc]) continue;
4732     /* form outgoing message for i-structure:
4733          buf_si[0]:                 nrows to be sent
4734                [1:nrows]:           row index (global)
4735                [nrows+1:2*nrows+1]: i-structure index
4736     */
4737     /*-------------------------------------------*/
4738     nrows       = len_si[proc]/2 - 1;
4739     buf_si_i    = buf_si + nrows+1;
4740     buf_si[0]   = nrows;
4741     buf_si_i[0] = 0;
4742     nrows       = 0;
4743     for (i=owners[proc]; i<owners[proc+1]; i++) {
4744       anzi = ai[i+1] - ai[i];
4745       if (anzi) {
4746         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4747         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4748         nrows++;
4749       }
4750     }
4751     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4752     k++;
4753     buf_si += len_si[proc];
4754   }
4755 
4756   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4757   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4758 
4759   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4760   for (i=0; i<merge->nrecv; i++) {
4761     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4762   }
4763 
4764   ierr = PetscFree(len_si);CHKERRQ(ierr);
4765   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4766   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4767   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4768   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4769   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4770   ierr = PetscFree(status);CHKERRQ(ierr);
4771 
4772   /* compute a local seq matrix in each processor */
4773   /*----------------------------------------------*/
4774   /* allocate bi array and free space for accumulating nonzero column info */
4775   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4776   bi[0] = 0;
4777 
4778   /* create and initialize a linked list */
4779   nlnk = N+1;
4780   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4781 
4782   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4783   len  = ai[owners[rank+1]] - ai[owners[rank]];
4784   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4785 
4786   current_space = free_space;
4787 
4788   /* determine symbolic info for each local row */
4789   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4790 
4791   for (k=0; k<merge->nrecv; k++) {
4792     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4793     nrows       = *buf_ri_k[k];
4794     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4795     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4796   }
4797 
4798   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4799   len  = 0;
4800   for (i=0; i<m; i++) {
4801     bnzi = 0;
4802     /* add local non-zero cols of this proc's seqmat into lnk */
4803     arow  = owners[rank] + i;
4804     anzi  = ai[arow+1] - ai[arow];
4805     aj    = a->j + ai[arow];
4806     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4807     bnzi += nlnk;
4808     /* add received col data into lnk */
4809     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4810       if (i == *nextrow[k]) { /* i-th row */
4811         anzi  = *(nextai[k]+1) - *nextai[k];
4812         aj    = buf_rj[k] + *nextai[k];
4813         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4814         bnzi += nlnk;
4815         nextrow[k]++; nextai[k]++;
4816       }
4817     }
4818     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4819 
4820     /* if free space is not available, make more free space */
4821     if (current_space->local_remaining<bnzi) {
4822       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4823       nspacedouble++;
4824     }
4825     /* copy data into free space, then initialize lnk */
4826     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4827     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4828 
4829     current_space->array           += bnzi;
4830     current_space->local_used      += bnzi;
4831     current_space->local_remaining -= bnzi;
4832 
4833     bi[i+1] = bi[i] + bnzi;
4834   }
4835 
4836   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4837 
4838   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4839   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4840   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4841 
4842   /* create symbolic parallel matrix B_mpi */
4843   /*---------------------------------------*/
4844   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4845   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4846   if (n==PETSC_DECIDE) {
4847     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4848   } else {
4849     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4850   }
4851   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4852   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4853   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4854   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4855   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4856 
4857   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4858   B_mpi->assembled  = PETSC_FALSE;
4859   merge->bi         = bi;
4860   merge->bj         = bj;
4861   merge->buf_ri     = buf_ri;
4862   merge->buf_rj     = buf_rj;
4863   merge->coi        = NULL;
4864   merge->coj        = NULL;
4865   merge->owners_co  = NULL;
4866 
4867   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4868 
4869   /* attach the supporting struct to B_mpi for reuse */
4870   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4871   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4872   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4873   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4874   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4875   *mpimat = B_mpi;
4876 
4877   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4878   PetscFunctionReturn(0);
4879 }
4880 
4881 /*@C
4882       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4883                  matrices from each processor
4884 
4885     Collective
4886 
4887    Input Parameters:
4888 +    comm - the communicators the parallel matrix will live on
4889 .    seqmat - the input sequential matrices
4890 .    m - number of local rows (or PETSC_DECIDE)
4891 .    n - number of local columns (or PETSC_DECIDE)
4892 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4893 
4894    Output Parameter:
4895 .    mpimat - the parallel matrix generated
4896 
4897     Level: advanced
4898 
4899    Notes:
4900      The dimensions of the sequential matrix in each processor MUST be the same.
4901      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4902      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4903 @*/
4904 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4905 {
4906   PetscErrorCode ierr;
4907   PetscMPIInt    size;
4908 
4909   PetscFunctionBegin;
4910   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4911   if (size == 1) {
4912     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4913     if (scall == MAT_INITIAL_MATRIX) {
4914       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4915     } else {
4916       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4917     }
4918     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4919     PetscFunctionReturn(0);
4920   }
4921   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4922   if (scall == MAT_INITIAL_MATRIX) {
4923     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4924   }
4925   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4926   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4927   PetscFunctionReturn(0);
4928 }
4929 
4930 /*@
4931      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4932           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4933           with MatGetSize()
4934 
4935     Not Collective
4936 
4937    Input Parameters:
4938 +    A - the matrix
4939 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4940 
4941    Output Parameter:
4942 .    A_loc - the local sequential matrix generated
4943 
4944     Level: developer
4945 
4946    Notes:
4947      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4948      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4949      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4950      modify the values of the returned A_loc.
4951 
4952 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4953 
4954 @*/
4955 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4956 {
4957   PetscErrorCode ierr;
4958   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4959   Mat_SeqAIJ     *mat,*a,*b;
4960   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4961   MatScalar      *aa,*ba,*cam;
4962   PetscScalar    *ca;
4963   PetscMPIInt    size;
4964   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4965   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4966   PetscBool      match;
4967 
4968   PetscFunctionBegin;
4969   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4970   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4971   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4972   if (size == 1) {
4973     if (scall == MAT_INITIAL_MATRIX) {
4974       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4975       *A_loc = mpimat->A;
4976     } else if (scall == MAT_REUSE_MATRIX) {
4977       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4978     }
4979     PetscFunctionReturn(0);
4980   }
4981 
4982   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4983   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4984   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4985   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4986   aa = a->a; ba = b->a;
4987   if (scall == MAT_INITIAL_MATRIX) {
4988     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4989     ci[0] = 0;
4990     for (i=0; i<am; i++) {
4991       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4992     }
4993     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4994     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4995     k    = 0;
4996     for (i=0; i<am; i++) {
4997       ncols_o = bi[i+1] - bi[i];
4998       ncols_d = ai[i+1] - ai[i];
4999       /* off-diagonal portion of A */
5000       for (jo=0; jo<ncols_o; jo++) {
5001         col = cmap[*bj];
5002         if (col >= cstart) break;
5003         cj[k]   = col; bj++;
5004         ca[k++] = *ba++;
5005       }
5006       /* diagonal portion of A */
5007       for (j=0; j<ncols_d; j++) {
5008         cj[k]   = cstart + *aj++;
5009         ca[k++] = *aa++;
5010       }
5011       /* off-diagonal portion of A */
5012       for (j=jo; j<ncols_o; j++) {
5013         cj[k]   = cmap[*bj++];
5014         ca[k++] = *ba++;
5015       }
5016     }
5017     /* put together the new matrix */
5018     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5019     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5020     /* Since these are PETSc arrays, change flags to free them as necessary. */
5021     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5022     mat->free_a  = PETSC_TRUE;
5023     mat->free_ij = PETSC_TRUE;
5024     mat->nonew   = 0;
5025   } else if (scall == MAT_REUSE_MATRIX) {
5026     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5027     ci = mat->i; cj = mat->j; cam = mat->a;
5028     for (i=0; i<am; i++) {
5029       /* off-diagonal portion of A */
5030       ncols_o = bi[i+1] - bi[i];
5031       for (jo=0; jo<ncols_o; jo++) {
5032         col = cmap[*bj];
5033         if (col >= cstart) break;
5034         *cam++ = *ba++; bj++;
5035       }
5036       /* diagonal portion of A */
5037       ncols_d = ai[i+1] - ai[i];
5038       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5039       /* off-diagonal portion of A */
5040       for (j=jo; j<ncols_o; j++) {
5041         *cam++ = *ba++; bj++;
5042       }
5043     }
5044   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5045   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5046   PetscFunctionReturn(0);
5047 }
5048 
5049 /*@C
5050      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5051 
5052     Not Collective
5053 
5054    Input Parameters:
5055 +    A - the matrix
5056 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5057 -    row, col - index sets of rows and columns to extract (or NULL)
5058 
5059    Output Parameter:
5060 .    A_loc - the local sequential matrix generated
5061 
5062     Level: developer
5063 
5064 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5065 
5066 @*/
5067 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5068 {
5069   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5070   PetscErrorCode ierr;
5071   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5072   IS             isrowa,iscola;
5073   Mat            *aloc;
5074   PetscBool      match;
5075 
5076   PetscFunctionBegin;
5077   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5078   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5079   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5080   if (!row) {
5081     start = A->rmap->rstart; end = A->rmap->rend;
5082     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5083   } else {
5084     isrowa = *row;
5085   }
5086   if (!col) {
5087     start = A->cmap->rstart;
5088     cmap  = a->garray;
5089     nzA   = a->A->cmap->n;
5090     nzB   = a->B->cmap->n;
5091     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5092     ncols = 0;
5093     for (i=0; i<nzB; i++) {
5094       if (cmap[i] < start) idx[ncols++] = cmap[i];
5095       else break;
5096     }
5097     imark = i;
5098     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5099     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5100     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5101   } else {
5102     iscola = *col;
5103   }
5104   if (scall != MAT_INITIAL_MATRIX) {
5105     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5106     aloc[0] = *A_loc;
5107   }
5108   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5109   if (!col) { /* attach global id of condensed columns */
5110     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5111   }
5112   *A_loc = aloc[0];
5113   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5114   if (!row) {
5115     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5116   }
5117   if (!col) {
5118     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5119   }
5120   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5121   PetscFunctionReturn(0);
5122 }
5123 
5124 /*
5125  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5126  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5127  * on a global size.
5128  * */
5129 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5130 {
5131   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5132   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5133   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5134   PetscMPIInt              owner;
5135   PetscSFNode              *iremote,*oiremote;
5136   const PetscInt           *lrowindices;
5137   PetscErrorCode           ierr;
5138   PetscSF                  sf,osf;
5139   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5140   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5141   MPI_Comm                 comm;
5142   ISLocalToGlobalMapping   mapping;
5143 
5144   PetscFunctionBegin;
5145   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5146   /* plocalsize is the number of roots
5147    * nrows is the number of leaves
5148    * */
5149   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5150   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5151   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5152   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5153   for (i=0;i<nrows;i++) {
5154     /* Find a remote index and an owner for a row
5155      * The row could be local or remote
5156      * */
5157     owner = 0;
5158     lidx  = 0;
5159     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5160     iremote[i].index = lidx;
5161     iremote[i].rank  = owner;
5162   }
5163   /* Create SF to communicate how many nonzero columns for each row */
5164   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5165   /* SF will figure out the number of nonzero colunms for each row, and their
5166    * offsets
5167    * */
5168   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5169   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5170   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5171 
5172   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5173   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5174   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5175   roffsets[0] = 0;
5176   roffsets[1] = 0;
5177   for (i=0;i<plocalsize;i++) {
5178     /* diag */
5179     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5180     /* off diag */
5181     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5182     /* compute offsets so that we relative location for each row */
5183     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5184     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5185   }
5186   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5187   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5188   /* 'r' means root, and 'l' means leaf */
5189   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5190   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5191   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5192   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5193   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5194   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5195   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5196   dntotalcols = 0;
5197   ontotalcols = 0;
5198   ncol = 0;
5199   for (i=0;i<nrows;i++) {
5200     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5201     ncol = PetscMax(pnnz[i],ncol);
5202     /* diag */
5203     dntotalcols += nlcols[i*2+0];
5204     /* off diag */
5205     ontotalcols += nlcols[i*2+1];
5206   }
5207   /* We do not need to figure the right number of columns
5208    * since all the calculations will be done by going through the raw data
5209    * */
5210   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5211   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5212   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5213   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5214   /* diag */
5215   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5216   /* off diag */
5217   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5218   /* diag */
5219   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5220   /* off diag */
5221   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5222   dntotalcols = 0;
5223   ontotalcols = 0;
5224   ntotalcols  = 0;
5225   for (i=0;i<nrows;i++) {
5226     owner = 0;
5227     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5228     /* Set iremote for diag matrix */
5229     for (j=0;j<nlcols[i*2+0];j++) {
5230       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5231       iremote[dntotalcols].rank    = owner;
5232       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5233       ilocal[dntotalcols++]        = ntotalcols++;
5234     }
5235     /* off diag */
5236     for (j=0;j<nlcols[i*2+1];j++) {
5237       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5238       oiremote[ontotalcols].rank    = owner;
5239       oilocal[ontotalcols++]        = ntotalcols++;
5240     }
5241   }
5242   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5243   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5244   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5245   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5246   /* P serves as roots and P_oth is leaves
5247    * Diag matrix
5248    * */
5249   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5250   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5251   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5252 
5253   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5254   /* Off diag */
5255   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5256   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5257   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5258   /* We operate on the matrix internal data for saving memory */
5259   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5260   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5261   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5262   /* Convert to global indices for diag matrix */
5263   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5264   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5265   /* We want P_oth store global indices */
5266   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5267   /* Use memory scalable approach */
5268   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5269   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5270   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5271   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5272   /* Convert back to local indices */
5273   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5274   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5275   nout = 0;
5276   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5277   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5278   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5279   /* Exchange values */
5280   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5281   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5282   /* Stop PETSc from shrinking memory */
5283   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5284   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5285   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5286   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5287   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5288   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5289   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5290   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5291   PetscFunctionReturn(0);
5292 }
5293 
5294 /*
5295  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5296  * This supports MPIAIJ and MAIJ
5297  * */
5298 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5299 {
5300   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5301   Mat_SeqAIJ            *p_oth;
5302   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5303   IS                    rows,map;
5304   PetscHMapI            hamp;
5305   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5306   MPI_Comm              comm;
5307   PetscSF               sf,osf;
5308   PetscBool             has;
5309   PetscErrorCode        ierr;
5310 
5311   PetscFunctionBegin;
5312   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5313   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5314   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5315    *  and then create a submatrix (that often is an overlapping matrix)
5316    * */
5317   if (reuse == MAT_INITIAL_MATRIX) {
5318     /* Use a hash table to figure out unique keys */
5319     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5320     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5321     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5322     count = 0;
5323     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5324     for (i=0;i<a->B->cmap->n;i++) {
5325       key  = a->garray[i]/dof;
5326       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5327       if (!has) {
5328         mapping[i] = count;
5329         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5330       } else {
5331         /* Current 'i' has the same value the previous step */
5332         mapping[i] = count-1;
5333       }
5334     }
5335     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5336     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5337     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5338     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5339     off = 0;
5340     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5341     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5342     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5343     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5344     /* In case, the matrix was already created but users want to recreate the matrix */
5345     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5346     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5347     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5348     ierr = ISDestroy(&map);CHKERRQ(ierr);
5349     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5350   } else if (reuse == MAT_REUSE_MATRIX) {
5351     /* If matrix was already created, we simply update values using SF objects
5352      * that as attached to the matrix ealier.
5353      *  */
5354     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5355     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5356     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5357     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5358     /* Update values in place */
5359     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5360     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5361     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5362     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5363   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5364   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5365   PetscFunctionReturn(0);
5366 }
5367 
5368 /*@C
5369     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5370 
5371     Collective on Mat
5372 
5373    Input Parameters:
5374 +    A,B - the matrices in mpiaij format
5375 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5376 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5377 
5378    Output Parameter:
5379 +    rowb, colb - index sets of rows and columns of B to extract
5380 -    B_seq - the sequential matrix generated
5381 
5382     Level: developer
5383 
5384 @*/
5385 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5386 {
5387   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5388   PetscErrorCode ierr;
5389   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5390   IS             isrowb,iscolb;
5391   Mat            *bseq=NULL;
5392 
5393   PetscFunctionBegin;
5394   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5395     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5396   }
5397   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5398 
5399   if (scall == MAT_INITIAL_MATRIX) {
5400     start = A->cmap->rstart;
5401     cmap  = a->garray;
5402     nzA   = a->A->cmap->n;
5403     nzB   = a->B->cmap->n;
5404     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5405     ncols = 0;
5406     for (i=0; i<nzB; i++) {  /* row < local row index */
5407       if (cmap[i] < start) idx[ncols++] = cmap[i];
5408       else break;
5409     }
5410     imark = i;
5411     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5412     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5413     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5414     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5415   } else {
5416     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5417     isrowb  = *rowb; iscolb = *colb;
5418     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5419     bseq[0] = *B_seq;
5420   }
5421   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5422   *B_seq = bseq[0];
5423   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5424   if (!rowb) {
5425     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5426   } else {
5427     *rowb = isrowb;
5428   }
5429   if (!colb) {
5430     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5431   } else {
5432     *colb = iscolb;
5433   }
5434   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5435   PetscFunctionReturn(0);
5436 }
5437 
5438 /*
5439     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5440     of the OFF-DIAGONAL portion of local A
5441 
5442     Collective on Mat
5443 
5444    Input Parameters:
5445 +    A,B - the matrices in mpiaij format
5446 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5447 
5448    Output Parameter:
5449 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5450 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5451 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5452 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5453 
5454     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5455      for this matrix. This is not desirable..
5456 
5457     Level: developer
5458 
5459 */
5460 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5461 {
5462   PetscErrorCode         ierr;
5463   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5464   Mat_SeqAIJ             *b_oth;
5465   VecScatter             ctx;
5466   MPI_Comm               comm;
5467   const PetscMPIInt      *rprocs,*sprocs;
5468   const PetscInt         *srow,*rstarts,*sstarts;
5469   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5470   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5471   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5472   MPI_Request            *rwaits = NULL,*swaits = NULL;
5473   MPI_Status             rstatus;
5474   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5475 
5476   PetscFunctionBegin;
5477   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5478   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5479 
5480   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5481     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5482   }
5483   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5484   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5485 
5486   if (size == 1) {
5487     startsj_s = NULL;
5488     bufa_ptr  = NULL;
5489     *B_oth    = NULL;
5490     PetscFunctionReturn(0);
5491   }
5492 
5493   ctx = a->Mvctx;
5494   tag = ((PetscObject)ctx)->tag;
5495 
5496   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5497   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5498   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5499   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5500   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5501   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5502   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5503 
5504   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5505   if (scall == MAT_INITIAL_MATRIX) {
5506     /* i-array */
5507     /*---------*/
5508     /*  post receives */
5509     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5510     for (i=0; i<nrecvs; i++) {
5511       rowlen = rvalues + rstarts[i]*rbs;
5512       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5513       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5514     }
5515 
5516     /* pack the outgoing message */
5517     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5518 
5519     sstartsj[0] = 0;
5520     rstartsj[0] = 0;
5521     len         = 0; /* total length of j or a array to be sent */
5522     if (nsends) {
5523       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5524       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5525     }
5526     for (i=0; i<nsends; i++) {
5527       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5528       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5529       for (j=0; j<nrows; j++) {
5530         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5531         for (l=0; l<sbs; l++) {
5532           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5533 
5534           rowlen[j*sbs+l] = ncols;
5535 
5536           len += ncols;
5537           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5538         }
5539         k++;
5540       }
5541       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5542 
5543       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5544     }
5545     /* recvs and sends of i-array are completed */
5546     i = nrecvs;
5547     while (i--) {
5548       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5549     }
5550     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5551     ierr = PetscFree(svalues);CHKERRQ(ierr);
5552 
5553     /* allocate buffers for sending j and a arrays */
5554     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5555     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5556 
5557     /* create i-array of B_oth */
5558     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5559 
5560     b_othi[0] = 0;
5561     len       = 0; /* total length of j or a array to be received */
5562     k         = 0;
5563     for (i=0; i<nrecvs; i++) {
5564       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5565       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5566       for (j=0; j<nrows; j++) {
5567         b_othi[k+1] = b_othi[k] + rowlen[j];
5568         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5569         k++;
5570       }
5571       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5572     }
5573     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5574 
5575     /* allocate space for j and a arrrays of B_oth */
5576     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5577     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5578 
5579     /* j-array */
5580     /*---------*/
5581     /*  post receives of j-array */
5582     for (i=0; i<nrecvs; i++) {
5583       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5584       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5585     }
5586 
5587     /* pack the outgoing message j-array */
5588     if (nsends) k = sstarts[0];
5589     for (i=0; i<nsends; i++) {
5590       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5591       bufJ  = bufj+sstartsj[i];
5592       for (j=0; j<nrows; j++) {
5593         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5594         for (ll=0; ll<sbs; ll++) {
5595           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5596           for (l=0; l<ncols; l++) {
5597             *bufJ++ = cols[l];
5598           }
5599           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5600         }
5601       }
5602       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5603     }
5604 
5605     /* recvs and sends of j-array are completed */
5606     i = nrecvs;
5607     while (i--) {
5608       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5609     }
5610     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5611   } else if (scall == MAT_REUSE_MATRIX) {
5612     sstartsj = *startsj_s;
5613     rstartsj = *startsj_r;
5614     bufa     = *bufa_ptr;
5615     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5616     b_otha   = b_oth->a;
5617   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5618 
5619   /* a-array */
5620   /*---------*/
5621   /*  post receives of a-array */
5622   for (i=0; i<nrecvs; i++) {
5623     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5624     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5625   }
5626 
5627   /* pack the outgoing message a-array */
5628   if (nsends) k = sstarts[0];
5629   for (i=0; i<nsends; i++) {
5630     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5631     bufA  = bufa+sstartsj[i];
5632     for (j=0; j<nrows; j++) {
5633       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5634       for (ll=0; ll<sbs; ll++) {
5635         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5636         for (l=0; l<ncols; l++) {
5637           *bufA++ = vals[l];
5638         }
5639         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5640       }
5641     }
5642     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5643   }
5644   /* recvs and sends of a-array are completed */
5645   i = nrecvs;
5646   while (i--) {
5647     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5648   }
5649   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5650   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5651 
5652   if (scall == MAT_INITIAL_MATRIX) {
5653     /* put together the new matrix */
5654     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5655 
5656     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5657     /* Since these are PETSc arrays, change flags to free them as necessary. */
5658     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5659     b_oth->free_a  = PETSC_TRUE;
5660     b_oth->free_ij = PETSC_TRUE;
5661     b_oth->nonew   = 0;
5662 
5663     ierr = PetscFree(bufj);CHKERRQ(ierr);
5664     if (!startsj_s || !bufa_ptr) {
5665       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5666       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5667     } else {
5668       *startsj_s = sstartsj;
5669       *startsj_r = rstartsj;
5670       *bufa_ptr  = bufa;
5671     }
5672   }
5673 
5674   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5675   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5676   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5677   PetscFunctionReturn(0);
5678 }
5679 
5680 /*@C
5681   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5682 
5683   Not Collective
5684 
5685   Input Parameters:
5686 . A - The matrix in mpiaij format
5687 
5688   Output Parameter:
5689 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5690 . colmap - A map from global column index to local index into lvec
5691 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5692 
5693   Level: developer
5694 
5695 @*/
5696 #if defined(PETSC_USE_CTABLE)
5697 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5698 #else
5699 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5700 #endif
5701 {
5702   Mat_MPIAIJ *a;
5703 
5704   PetscFunctionBegin;
5705   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5706   PetscValidPointer(lvec, 2);
5707   PetscValidPointer(colmap, 3);
5708   PetscValidPointer(multScatter, 4);
5709   a = (Mat_MPIAIJ*) A->data;
5710   if (lvec) *lvec = a->lvec;
5711   if (colmap) *colmap = a->colmap;
5712   if (multScatter) *multScatter = a->Mvctx;
5713   PetscFunctionReturn(0);
5714 }
5715 
5716 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5717 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5718 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5719 #if defined(PETSC_HAVE_MKL_SPARSE)
5720 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5721 #endif
5722 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5724 #if defined(PETSC_HAVE_ELEMENTAL)
5725 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5726 #endif
5727 #if defined(PETSC_HAVE_HYPRE)
5728 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5729 #endif
5730 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5731 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5732 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5733 
5734 /*
5735     Computes (B'*A')' since computing B*A directly is untenable
5736 
5737                n                       p                          p
5738         (              )       (              )         (                  )
5739       m (      A       )  *  n (       B      )   =   m (         C        )
5740         (              )       (              )         (                  )
5741 
5742 */
5743 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5744 {
5745   PetscErrorCode ierr;
5746   Mat            At,Bt,Ct;
5747 
5748   PetscFunctionBegin;
5749   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5750   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5751   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5752   ierr = MatDestroy(&At);CHKERRQ(ierr);
5753   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5754   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5755   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5756   PetscFunctionReturn(0);
5757 }
5758 
5759 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5760 {
5761   PetscErrorCode ierr;
5762   PetscBool      cisdense;
5763 
5764   PetscFunctionBegin;
5765   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5766   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5767   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5768   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5769   if (!cisdense) {
5770     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5771   }
5772   ierr = MatSetUp(C);CHKERRQ(ierr);
5773 
5774   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5775   PetscFunctionReturn(0);
5776 }
5777 
5778 /* ----------------------------------------------------------------*/
5779 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5780 {
5781   Mat_Product *product = C->product;
5782   Mat         A = product->A,B=product->B;
5783 
5784   PetscFunctionBegin;
5785   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5786     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5787 
5788   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5789   C->ops->productsymbolic = MatProductSymbolic_AB;
5790   PetscFunctionReturn(0);
5791 }
5792 
5793 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5794 {
5795   PetscErrorCode ierr;
5796   Mat_Product    *product = C->product;
5797 
5798   PetscFunctionBegin;
5799   if (product->type == MATPRODUCT_AB) {
5800     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5801   }
5802   PetscFunctionReturn(0);
5803 }
5804 /* ----------------------------------------------------------------*/
5805 
5806 /*MC
5807    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5808 
5809    Options Database Keys:
5810 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5811 
5812    Level: beginner
5813 
5814    Notes:
5815     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5816     in this case the values associated with the rows and columns one passes in are set to zero
5817     in the matrix
5818 
5819     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5820     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5821 
5822 .seealso: MatCreateAIJ()
5823 M*/
5824 
5825 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5826 {
5827   Mat_MPIAIJ     *b;
5828   PetscErrorCode ierr;
5829   PetscMPIInt    size;
5830 
5831   PetscFunctionBegin;
5832   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5833 
5834   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5835   B->data       = (void*)b;
5836   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5837   B->assembled  = PETSC_FALSE;
5838   B->insertmode = NOT_SET_VALUES;
5839   b->size       = size;
5840 
5841   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5842 
5843   /* build cache for off array entries formed */
5844   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5845 
5846   b->donotstash  = PETSC_FALSE;
5847   b->colmap      = 0;
5848   b->garray      = 0;
5849   b->roworiented = PETSC_TRUE;
5850 
5851   /* stuff used for matrix vector multiply */
5852   b->lvec  = NULL;
5853   b->Mvctx = NULL;
5854 
5855   /* stuff for MatGetRow() */
5856   b->rowindices   = 0;
5857   b->rowvalues    = 0;
5858   b->getrowactive = PETSC_FALSE;
5859 
5860   /* flexible pointer used in CUSP/CUSPARSE classes */
5861   b->spptr = NULL;
5862 
5863   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5864   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5868   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5869   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5870   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5873 #if defined(PETSC_HAVE_MKL_SPARSE)
5874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5875 #endif
5876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5879 #if defined(PETSC_HAVE_ELEMENTAL)
5880   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5881 #endif
5882   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5884 #if defined(PETSC_HAVE_HYPRE)
5885   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5887 #endif
5888   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5890   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5891   PetscFunctionReturn(0);
5892 }
5893 
5894 /*@C
5895      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5896          and "off-diagonal" part of the matrix in CSR format.
5897 
5898    Collective
5899 
5900    Input Parameters:
5901 +  comm - MPI communicator
5902 .  m - number of local rows (Cannot be PETSC_DECIDE)
5903 .  n - This value should be the same as the local size used in creating the
5904        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5905        calculated if N is given) For square matrices n is almost always m.
5906 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5907 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5908 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5909 .   j - column indices
5910 .   a - matrix values
5911 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5912 .   oj - column indices
5913 -   oa - matrix values
5914 
5915    Output Parameter:
5916 .   mat - the matrix
5917 
5918    Level: advanced
5919 
5920    Notes:
5921        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5922        must free the arrays once the matrix has been destroyed and not before.
5923 
5924        The i and j indices are 0 based
5925 
5926        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5927 
5928        This sets local rows and cannot be used to set off-processor values.
5929 
5930        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5931        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5932        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5933        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5934        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5935        communication if it is known that only local entries will be set.
5936 
5937 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5938           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5939 @*/
5940 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5941 {
5942   PetscErrorCode ierr;
5943   Mat_MPIAIJ     *maij;
5944 
5945   PetscFunctionBegin;
5946   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5947   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5948   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5949   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5950   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5951   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5952   maij = (Mat_MPIAIJ*) (*mat)->data;
5953 
5954   (*mat)->preallocated = PETSC_TRUE;
5955 
5956   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5957   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5958 
5959   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5960   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5961 
5962   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5963   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5964   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5965   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5966 
5967   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5968   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5969   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5970   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5971   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5972   PetscFunctionReturn(0);
5973 }
5974 
5975 /*
5976     Special version for direct calls from Fortran
5977 */
5978 #include <petsc/private/fortranimpl.h>
5979 
5980 /* Change these macros so can be used in void function */
5981 #undef CHKERRQ
5982 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5983 #undef SETERRQ2
5984 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5985 #undef SETERRQ3
5986 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5987 #undef SETERRQ
5988 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5989 
5990 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5991 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5992 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5993 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5994 #else
5995 #endif
5996 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5997 {
5998   Mat            mat  = *mmat;
5999   PetscInt       m    = *mm, n = *mn;
6000   InsertMode     addv = *maddv;
6001   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6002   PetscScalar    value;
6003   PetscErrorCode ierr;
6004 
6005   MatCheckPreallocated(mat,1);
6006   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6007   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6008   {
6009     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6010     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6011     PetscBool roworiented = aij->roworiented;
6012 
6013     /* Some Variables required in the macro */
6014     Mat        A                    = aij->A;
6015     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6016     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6017     MatScalar  *aa                  = a->a;
6018     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6019     Mat        B                    = aij->B;
6020     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6021     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6022     MatScalar  *ba                  = b->a;
6023     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6024      * cannot use "#if defined" inside a macro. */
6025     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6026 
6027     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6028     PetscInt  nonew = a->nonew;
6029     MatScalar *ap1,*ap2;
6030 
6031     PetscFunctionBegin;
6032     for (i=0; i<m; i++) {
6033       if (im[i] < 0) continue;
6034       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6035       if (im[i] >= rstart && im[i] < rend) {
6036         row      = im[i] - rstart;
6037         lastcol1 = -1;
6038         rp1      = aj + ai[row];
6039         ap1      = aa + ai[row];
6040         rmax1    = aimax[row];
6041         nrow1    = ailen[row];
6042         low1     = 0;
6043         high1    = nrow1;
6044         lastcol2 = -1;
6045         rp2      = bj + bi[row];
6046         ap2      = ba + bi[row];
6047         rmax2    = bimax[row];
6048         nrow2    = bilen[row];
6049         low2     = 0;
6050         high2    = nrow2;
6051 
6052         for (j=0; j<n; j++) {
6053           if (roworiented) value = v[i*n+j];
6054           else value = v[i+j*m];
6055           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6056           if (in[j] >= cstart && in[j] < cend) {
6057             col = in[j] - cstart;
6058             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6059 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6060             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6061 #endif
6062           } else if (in[j] < 0) continue;
6063           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6064             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6065             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6066           } else {
6067             if (mat->was_assembled) {
6068               if (!aij->colmap) {
6069                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6070               }
6071 #if defined(PETSC_USE_CTABLE)
6072               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6073               col--;
6074 #else
6075               col = aij->colmap[in[j]] - 1;
6076 #endif
6077               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6078                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6079                 col  =  in[j];
6080                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6081                 B        = aij->B;
6082                 b        = (Mat_SeqAIJ*)B->data;
6083                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6084                 rp2      = bj + bi[row];
6085                 ap2      = ba + bi[row];
6086                 rmax2    = bimax[row];
6087                 nrow2    = bilen[row];
6088                 low2     = 0;
6089                 high2    = nrow2;
6090                 bm       = aij->B->rmap->n;
6091                 ba       = b->a;
6092                 inserted = PETSC_FALSE;
6093               }
6094             } else col = in[j];
6095             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6096 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6097             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6098 #endif
6099           }
6100         }
6101       } else if (!aij->donotstash) {
6102         if (roworiented) {
6103           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6104         } else {
6105           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6106         }
6107       }
6108     }
6109   }
6110   PetscFunctionReturnVoid();
6111 }
6112