xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2bcef1f22680dd67181dd710ef3d41bc6d95a252)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_HYPRE)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1308 {
1309   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1311   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1312   const PetscInt    *garray = aij->garray;
1313   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1314   PetscInt          *rowlens;
1315   PetscInt          *colidxs;
1316   PetscScalar       *matvals;
1317   PetscErrorCode    ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1321 
1322   M  = mat->rmap->N;
1323   N  = mat->cmap->N;
1324   m  = mat->rmap->n;
1325   rs = mat->rmap->rstart;
1326   cs = mat->cmap->rstart;
1327   nz = A->nz + B->nz;
1328 
1329   /* write matrix header */
1330   header[0] = MAT_FILE_CLASSID;
1331   header[1] = M; header[2] = N; header[3] = nz;
1332   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1334 
1335   /* fill in and store row lengths  */
1336   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1337   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1338   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1339   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1340 
1341   /* fill in and store column indices */
1342   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1343   for (cnt=0, i=0; i<m; i++) {
1344     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1345       if (garray[B->j[jb]] > cs) break;
1346       colidxs[cnt++] = garray[B->j[jb]];
1347     }
1348     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1349       colidxs[cnt++] = A->j[ja] + cs;
1350     for (; jb<B->i[i+1]; jb++)
1351       colidxs[cnt++] = garray[B->j[jb]];
1352   }
1353   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1354   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1355   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1356 
1357   /* fill in and store nonzero values */
1358   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1359   for (cnt=0, i=0; i<m; i++) {
1360     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1361       if (garray[B->j[jb]] > cs) break;
1362       matvals[cnt++] = B->a[jb];
1363     }
1364     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1365       matvals[cnt++] = A->a[ja];
1366     for (; jb<B->i[i+1]; jb++)
1367       matvals[cnt++] = B->a[jb];
1368   }
1369   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1370   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1371   ierr = PetscFree(matvals);CHKERRQ(ierr);
1372 
1373   /* write block size option to the viewer's .info file */
1374   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #include <petscdraw.h>
1379 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1380 {
1381   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1382   PetscErrorCode    ierr;
1383   PetscMPIInt       rank = aij->rank,size = aij->size;
1384   PetscBool         isdraw,iascii,isbinary;
1385   PetscViewer       sviewer;
1386   PetscViewerFormat format;
1387 
1388   PetscFunctionBegin;
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1392   if (iascii) {
1393     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1394     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1395       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1396       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1397       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398       for (i=0; i<(PetscInt)size; i++) {
1399         nmax = PetscMax(nmax,nz[i]);
1400         nmin = PetscMin(nmin,nz[i]);
1401         navg += nz[i];
1402       }
1403       ierr = PetscFree(nz);CHKERRQ(ierr);
1404       navg = navg/size;
1405       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1406       PetscFunctionReturn(0);
1407     }
1408     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1409     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1410       MatInfo   info;
1411       PetscBool inodes;
1412 
1413       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1414       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1415       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1416       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1417       if (!inodes) {
1418         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1419                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1420       } else {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       }
1424       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1425       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1426       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1427       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1428       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1429       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1431       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1432       PetscFunctionReturn(0);
1433     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1434       PetscInt inodecount,inodelimit,*inodes;
1435       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1436       if (inodes) {
1437         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1438       } else {
1439         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1440       }
1441       PetscFunctionReturn(0);
1442     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1443       PetscFunctionReturn(0);
1444     }
1445   } else if (isbinary) {
1446     if (size == 1) {
1447       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1448       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1449     } else {
1450       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1451     }
1452     PetscFunctionReturn(0);
1453   } else if (iascii && size == 1) {
1454     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1455     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1456     PetscFunctionReturn(0);
1457   } else if (isdraw) {
1458     PetscDraw draw;
1459     PetscBool isnull;
1460     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1461     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1462     if (isnull) PetscFunctionReturn(0);
1463   }
1464 
1465   { /* assemble the entire matrix onto first processor */
1466     Mat A = NULL, Av;
1467     IS  isrow,iscol;
1468 
1469     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1470     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1471     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1472     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1473 /*  The commented code uses MatCreateSubMatrices instead */
1474 /*
1475     Mat *AA, A = NULL, Av;
1476     IS  isrow,iscol;
1477 
1478     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1479     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1480     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1481     if (!rank) {
1482        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1483        A    = AA[0];
1484        Av   = AA[0];
1485     }
1486     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1487 */
1488     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1489     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1490     /*
1491        Everyone has to call to draw the matrix since the graphics waits are
1492        synchronized across all processors that share the PetscDraw object
1493     */
1494     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1495     if (!rank) {
1496       if (((PetscObject)mat)->name) {
1497         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       }
1499       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1500     }
1501     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1502     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1503     ierr = MatDestroy(&A);CHKERRQ(ierr);
1504   }
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1509 {
1510   PetscErrorCode ierr;
1511   PetscBool      iascii,isdraw,issocket,isbinary;
1512 
1513   PetscFunctionBegin;
1514   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1515   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1516   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1518   if (iascii || isdraw || isbinary || issocket) {
1519     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1525 {
1526   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1527   PetscErrorCode ierr;
1528   Vec            bb1 = 0;
1529   PetscBool      hasop;
1530 
1531   PetscFunctionBegin;
1532   if (flag == SOR_APPLY_UPPER) {
1533     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1534     PetscFunctionReturn(0);
1535   }
1536 
1537   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1538     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1539   }
1540 
1541   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1542     if (flag & SOR_ZERO_INITIAL_GUESS) {
1543       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1544       its--;
1545     }
1546 
1547     while (its--) {
1548       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1549       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1550 
1551       /* update rhs: bb1 = bb - B*x */
1552       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1553       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1554 
1555       /* local sweep */
1556       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1557     }
1558   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1559     if (flag & SOR_ZERO_INITIAL_GUESS) {
1560       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1561       its--;
1562     }
1563     while (its--) {
1564       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1565       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1566 
1567       /* update rhs: bb1 = bb - B*x */
1568       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1569       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1570 
1571       /* local sweep */
1572       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1573     }
1574   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1575     if (flag & SOR_ZERO_INITIAL_GUESS) {
1576       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1577       its--;
1578     }
1579     while (its--) {
1580       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1581       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582 
1583       /* update rhs: bb1 = bb - B*x */
1584       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1585       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1586 
1587       /* local sweep */
1588       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1589     }
1590   } else if (flag & SOR_EISENSTAT) {
1591     Vec xx1;
1592 
1593     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1594     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1595 
1596     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1597     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598     if (!mat->diag) {
1599       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1600       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1601     }
1602     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1603     if (hasop) {
1604       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1605     } else {
1606       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1607     }
1608     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1609 
1610     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1611 
1612     /* local sweep */
1613     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1614     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1615     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1616   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1617 
1618   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1619 
1620   matin->factorerrortype = mat->A->factorerrortype;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1625 {
1626   Mat            aA,aB,Aperm;
1627   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1628   PetscScalar    *aa,*ba;
1629   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1630   PetscSF        rowsf,sf;
1631   IS             parcolp = NULL;
1632   PetscBool      done;
1633   PetscErrorCode ierr;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1637   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1638   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1639   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1640 
1641   /* Invert row permutation to find out where my rows should go */
1642   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1643   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1644   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1645   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1646   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1647   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1648 
1649   /* Invert column permutation to find out where my columns should go */
1650   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1651   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1652   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1653   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1654   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1656   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1657 
1658   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1659   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1660   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1661 
1662   /* Find out where my gcols should go */
1663   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1664   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1665   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1666   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1667   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1668   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1669   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1670   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1671 
1672   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1673   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1674   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1675   for (i=0; i<m; i++) {
1676     PetscInt    row = rdest[i];
1677     PetscMPIInt rowner;
1678     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1679     for (j=ai[i]; j<ai[i+1]; j++) {
1680       PetscInt    col = cdest[aj[j]];
1681       PetscMPIInt cowner;
1682       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1683       if (rowner == cowner) dnnz[i]++;
1684       else onnz[i]++;
1685     }
1686     for (j=bi[i]; j<bi[i+1]; j++) {
1687       PetscInt    col = gcdest[bj[j]];
1688       PetscMPIInt cowner;
1689       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1690       if (rowner == cowner) dnnz[i]++;
1691       else onnz[i]++;
1692     }
1693   }
1694   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1695   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1696   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1697   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1698   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1699 
1700   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1701   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1702   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1703   for (i=0; i<m; i++) {
1704     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1705     PetscInt j0,rowlen;
1706     rowlen = ai[i+1] - ai[i];
1707     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1708       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1709       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1710     }
1711     rowlen = bi[i+1] - bi[i];
1712     for (j0=j=0; j<rowlen; j0=j) {
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716   }
1717   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1718   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1720   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1721   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1722   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1723   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1724   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1725   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1726   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1727   *B = Aperm;
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1732 {
1733   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1734   PetscErrorCode ierr;
1735 
1736   PetscFunctionBegin;
1737   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1738   if (ghosts) *ghosts = aij->garray;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1743 {
1744   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1745   Mat            A    = mat->A,B = mat->B;
1746   PetscErrorCode ierr;
1747   PetscLogDouble isend[5],irecv[5];
1748 
1749   PetscFunctionBegin;
1750   info->block_size = 1.0;
1751   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1752 
1753   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1754   isend[3] = info->memory;  isend[4] = info->mallocs;
1755 
1756   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1759   isend[3] += info->memory;  isend[4] += info->mallocs;
1760   if (flag == MAT_LOCAL) {
1761     info->nz_used      = isend[0];
1762     info->nz_allocated = isend[1];
1763     info->nz_unneeded  = isend[2];
1764     info->memory       = isend[3];
1765     info->mallocs      = isend[4];
1766   } else if (flag == MAT_GLOBAL_MAX) {
1767     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1768 
1769     info->nz_used      = irecv[0];
1770     info->nz_allocated = irecv[1];
1771     info->nz_unneeded  = irecv[2];
1772     info->memory       = irecv[3];
1773     info->mallocs      = irecv[4];
1774   } else if (flag == MAT_GLOBAL_SUM) {
1775     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1776 
1777     info->nz_used      = irecv[0];
1778     info->nz_allocated = irecv[1];
1779     info->nz_unneeded  = irecv[2];
1780     info->memory       = irecv[3];
1781     info->mallocs      = irecv[4];
1782   }
1783   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1784   info->fill_ratio_needed = 0;
1785   info->factor_mallocs    = 0;
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1790 {
1791   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1792   PetscErrorCode ierr;
1793 
1794   PetscFunctionBegin;
1795   switch (op) {
1796   case MAT_NEW_NONZERO_LOCATIONS:
1797   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1798   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1799   case MAT_KEEP_NONZERO_PATTERN:
1800   case MAT_NEW_NONZERO_LOCATION_ERR:
1801   case MAT_USE_INODES:
1802   case MAT_IGNORE_ZERO_ENTRIES:
1803     MatCheckPreallocated(A,1);
1804     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1805     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1806     break;
1807   case MAT_ROW_ORIENTED:
1808     MatCheckPreallocated(A,1);
1809     a->roworiented = flg;
1810 
1811     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_NEW_DIAGONALS:
1815   case MAT_SORTED_FULL:
1816     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1817     break;
1818   case MAT_IGNORE_OFF_PROC_ENTRIES:
1819     a->donotstash = flg;
1820     break;
1821   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1822   case MAT_SPD:
1823   case MAT_SYMMETRIC:
1824   case MAT_STRUCTURALLY_SYMMETRIC:
1825   case MAT_HERMITIAN:
1826   case MAT_SYMMETRY_ETERNAL:
1827     break;
1828   case MAT_SUBMAT_SINGLEIS:
1829     A->submat_singleis = flg;
1830     break;
1831   case MAT_STRUCTURE_ONLY:
1832     /* The option is handled directly by MatSetOption() */
1833     break;
1834   default:
1835     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1836   }
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1841 {
1842   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1843   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1844   PetscErrorCode ierr;
1845   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1846   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1847   PetscInt       *cmap,*idx_p;
1848 
1849   PetscFunctionBegin;
1850   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1851   mat->getrowactive = PETSC_TRUE;
1852 
1853   if (!mat->rowvalues && (idx || v)) {
1854     /*
1855         allocate enough space to hold information from the longest row.
1856     */
1857     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1858     PetscInt   max = 1,tmp;
1859     for (i=0; i<matin->rmap->n; i++) {
1860       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1861       if (max < tmp) max = tmp;
1862     }
1863     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1864   }
1865 
1866   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1867   lrow = row - rstart;
1868 
1869   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1870   if (!v)   {pvA = 0; pvB = 0;}
1871   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1872   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1873   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1874   nztot = nzA + nzB;
1875 
1876   cmap = mat->garray;
1877   if (v  || idx) {
1878     if (nztot) {
1879       /* Sort by increasing column numbers, assuming A and B already sorted */
1880       PetscInt imark = -1;
1881       if (v) {
1882         *v = v_p = mat->rowvalues;
1883         for (i=0; i<nzB; i++) {
1884           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1885           else break;
1886         }
1887         imark = i;
1888         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1889         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1890       }
1891       if (idx) {
1892         *idx = idx_p = mat->rowindices;
1893         if (imark > -1) {
1894           for (i=0; i<imark; i++) {
1895             idx_p[i] = cmap[cworkB[i]];
1896           }
1897         } else {
1898           for (i=0; i<nzB; i++) {
1899             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1900             else break;
1901           }
1902           imark = i;
1903         }
1904         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1905         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1906       }
1907     } else {
1908       if (idx) *idx = 0;
1909       if (v)   *v   = 0;
1910     }
1911   }
1912   *nz  = nztot;
1913   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1914   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1919 {
1920   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1921 
1922   PetscFunctionBegin;
1923   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1924   aij->getrowactive = PETSC_FALSE;
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1929 {
1930   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1931   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1932   PetscErrorCode ierr;
1933   PetscInt       i,j,cstart = mat->cmap->rstart;
1934   PetscReal      sum = 0.0;
1935   MatScalar      *v;
1936 
1937   PetscFunctionBegin;
1938   if (aij->size == 1) {
1939     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1940   } else {
1941     if (type == NORM_FROBENIUS) {
1942       v = amat->a;
1943       for (i=0; i<amat->nz; i++) {
1944         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1945       }
1946       v = bmat->a;
1947       for (i=0; i<bmat->nz; i++) {
1948         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1949       }
1950       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1951       *norm = PetscSqrtReal(*norm);
1952       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1953     } else if (type == NORM_1) { /* max column norm */
1954       PetscReal *tmp,*tmp2;
1955       PetscInt  *jj,*garray = aij->garray;
1956       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1957       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1958       *norm = 0.0;
1959       v     = amat->a; jj = amat->j;
1960       for (j=0; j<amat->nz; j++) {
1961         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1962       }
1963       v = bmat->a; jj = bmat->j;
1964       for (j=0; j<bmat->nz; j++) {
1965         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1966       }
1967       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1968       for (j=0; j<mat->cmap->N; j++) {
1969         if (tmp2[j] > *norm) *norm = tmp2[j];
1970       }
1971       ierr = PetscFree(tmp);CHKERRQ(ierr);
1972       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1973       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1974     } else if (type == NORM_INFINITY) { /* max row norm */
1975       PetscReal ntemp = 0.0;
1976       for (j=0; j<aij->A->rmap->n; j++) {
1977         v   = amat->a + amat->i[j];
1978         sum = 0.0;
1979         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1980           sum += PetscAbsScalar(*v); v++;
1981         }
1982         v = bmat->a + bmat->i[j];
1983         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1984           sum += PetscAbsScalar(*v); v++;
1985         }
1986         if (sum > ntemp) ntemp = sum;
1987       }
1988       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1990     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1991   }
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1996 {
1997   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1998   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1999   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2000   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2001   PetscErrorCode  ierr;
2002   Mat             B,A_diag,*B_diag;
2003   const MatScalar *array;
2004 
2005   PetscFunctionBegin;
2006   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2007   ai = Aloc->i; aj = Aloc->j;
2008   bi = Bloc->i; bj = Bloc->j;
2009   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2010     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2011     PetscSFNode          *oloc;
2012     PETSC_UNUSED PetscSF sf;
2013 
2014     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2015     /* compute d_nnz for preallocation */
2016     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2017     for (i=0; i<ai[ma]; i++) {
2018       d_nnz[aj[i]]++;
2019     }
2020     /* compute local off-diagonal contributions */
2021     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2022     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2023     /* map those to global */
2024     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2025     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2026     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2027     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2028     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2029     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2030     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2031 
2032     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2033     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2034     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2035     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2036     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2037     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2038   } else {
2039     B    = *matout;
2040     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2041   }
2042 
2043   b           = (Mat_MPIAIJ*)B->data;
2044   A_diag      = a->A;
2045   B_diag      = &b->A;
2046   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2047   A_diag_ncol = A_diag->cmap->N;
2048   B_diag_ilen = sub_B_diag->ilen;
2049   B_diag_i    = sub_B_diag->i;
2050 
2051   /* Set ilen for diagonal of B */
2052   for (i=0; i<A_diag_ncol; i++) {
2053     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2054   }
2055 
2056   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2057   very quickly (=without using MatSetValues), because all writes are local. */
2058   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2059 
2060   /* copy over the B part */
2061   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2062   array = Bloc->a;
2063   row   = A->rmap->rstart;
2064   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2065   cols_tmp = cols;
2066   for (i=0; i<mb; i++) {
2067     ncol = bi[i+1]-bi[i];
2068     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2069     row++;
2070     array += ncol; cols_tmp += ncol;
2071   }
2072   ierr = PetscFree(cols);CHKERRQ(ierr);
2073 
2074   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2077     *matout = B;
2078   } else {
2079     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2085 {
2086   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2087   Mat            a    = aij->A,b = aij->B;
2088   PetscErrorCode ierr;
2089   PetscInt       s1,s2,s3;
2090 
2091   PetscFunctionBegin;
2092   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2093   if (rr) {
2094     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2095     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2096     /* Overlap communication with computation. */
2097     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2098   }
2099   if (ll) {
2100     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2101     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2102     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2103   }
2104   /* scale  the diagonal block */
2105   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2106 
2107   if (rr) {
2108     /* Do a scatter end and then right scale the off-diagonal block */
2109     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2110     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2111   }
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2116 {
2117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2118   PetscErrorCode ierr;
2119 
2120   PetscFunctionBegin;
2121   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2126 {
2127   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2128   Mat            a,b,c,d;
2129   PetscBool      flg;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   a = matA->A; b = matA->B;
2134   c = matB->A; d = matB->B;
2135 
2136   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2137   if (flg) {
2138     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2139   }
2140   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2145 {
2146   PetscErrorCode ierr;
2147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2148   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2149 
2150   PetscFunctionBegin;
2151   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2152   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2153     /* because of the column compression in the off-processor part of the matrix a->B,
2154        the number of columns in a->B and b->B may be different, hence we cannot call
2155        the MatCopy() directly on the two parts. If need be, we can provide a more
2156        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2157        then copying the submatrices */
2158     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2159   } else {
2160     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2161     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2162   }
2163   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2168 {
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 /*
2177    Computes the number of nonzeros per row needed for preallocation when X and Y
2178    have different nonzero structure.
2179 */
2180 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2181 {
2182   PetscInt       i,j,k,nzx,nzy;
2183 
2184   PetscFunctionBegin;
2185   /* Set the number of nonzeros in the new matrix */
2186   for (i=0; i<m; i++) {
2187     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2188     nzx = xi[i+1] - xi[i];
2189     nzy = yi[i+1] - yi[i];
2190     nnz[i] = 0;
2191     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2192       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2193       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2194       nnz[i]++;
2195     }
2196     for (; k<nzy; k++) nnz[i]++;
2197   }
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2202 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2203 {
2204   PetscErrorCode ierr;
2205   PetscInt       m = Y->rmap->N;
2206   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2207   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2215 {
2216   PetscErrorCode ierr;
2217   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2218   PetscBLASInt   bnz,one=1;
2219   Mat_SeqAIJ     *x,*y;
2220 
2221   PetscFunctionBegin;
2222   if (str == SAME_NONZERO_PATTERN) {
2223     PetscScalar alpha = a;
2224     x    = (Mat_SeqAIJ*)xx->A->data;
2225     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2226     y    = (Mat_SeqAIJ*)yy->A->data;
2227     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2228     x    = (Mat_SeqAIJ*)xx->B->data;
2229     y    = (Mat_SeqAIJ*)yy->B->data;
2230     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2231     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2232     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2233     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2234        will be updated */
2235 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2236     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2237       Y->offloadmask = PETSC_OFFLOAD_CPU;
2238     }
2239 #endif
2240   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2241     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2242   } else {
2243     Mat      B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2266 {
2267 #if defined(PETSC_USE_COMPLEX)
2268   PetscErrorCode ierr;
2269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2273   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2274 #else
2275   PetscFunctionBegin;
2276 #endif
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2298   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2303 {
2304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode ierr;
2306   PetscInt       i,*idxb = 0;
2307   PetscScalar    *va,*vb;
2308   Vec            vtmp;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2312   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2313   if (idx) {
2314     for (i=0; i<A->rmap->n; i++) {
2315       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2316     }
2317   }
2318 
2319   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2320   if (idx) {
2321     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2322   }
2323   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2324   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2325 
2326   for (i=0; i<A->rmap->n; i++) {
2327     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2328       va[i] = vb[i];
2329       if (idx) idx[i] = a->garray[idxb[i]];
2330     }
2331   }
2332 
2333   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2334   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2335   ierr = PetscFree(idxb);CHKERRQ(ierr);
2336   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,*idxb = 0;
2345   PetscScalar    *va,*vb;
2346   Vec            vtmp;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351   if (idx) {
2352     for (i=0; i<A->cmap->n; i++) {
2353       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2354     }
2355   }
2356 
2357   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2358   if (idx) {
2359     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2360   }
2361   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2362   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2363 
2364   for (i=0; i<A->rmap->n; i++) {
2365     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2366       va[i] = vb[i];
2367       if (idx) idx[i] = a->garray[idxb[i]];
2368     }
2369   }
2370 
2371   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2373   ierr = PetscFree(idxb);CHKERRQ(ierr);
2374   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2379 {
2380   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2381   PetscInt       n      = A->rmap->n;
2382   PetscInt       cstart = A->cmap->rstart;
2383   PetscInt       *cmap  = mat->garray;
2384   PetscInt       *diagIdx, *offdiagIdx;
2385   Vec            diagV, offdiagV;
2386   PetscScalar    *a, *diagA, *offdiagA;
2387   PetscInt       r;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2392   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2393   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2394   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2395   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2396   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   for (r = 0; r < n; ++r) {
2400     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2401       a[r]   = diagA[r];
2402       idx[r] = cstart + diagIdx[r];
2403     } else {
2404       a[r]   = offdiagA[r];
2405       idx[r] = cmap[offdiagIdx[r]];
2406     }
2407   }
2408   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2409   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2411   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2412   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2413   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2418 {
2419   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2420   PetscInt       n      = A->rmap->n;
2421   PetscInt       cstart = A->cmap->rstart;
2422   PetscInt       *cmap  = mat->garray;
2423   PetscInt       *diagIdx, *offdiagIdx;
2424   Vec            diagV, offdiagV;
2425   PetscScalar    *a, *diagA, *offdiagA;
2426   PetscInt       r;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2436   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2437   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2438   for (r = 0; r < n; ++r) {
2439     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2440       a[r]   = diagA[r];
2441       idx[r] = cstart + diagIdx[r];
2442     } else {
2443       a[r]   = offdiagA[r];
2444       idx[r] = cmap[offdiagIdx[r]];
2445     }
2446   }
2447   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2451   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2452   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2457 {
2458   PetscErrorCode ierr;
2459   Mat            *dummy;
2460 
2461   PetscFunctionBegin;
2462   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2463   *newmat = *dummy;
2464   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2469 {
2470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2475   A->factorerrortype = a->A->factorerrortype;
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2480 {
2481   PetscErrorCode ierr;
2482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2483 
2484   PetscFunctionBegin;
2485   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2486   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2487   if (x->assembled) {
2488     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2489   } else {
2490     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2491   }
2492   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2493   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2498 {
2499   PetscFunctionBegin;
2500   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2501   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 /*@
2506    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2507 
2508    Collective on Mat
2509 
2510    Input Parameters:
2511 +    A - the matrix
2512 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2513 
2514  Level: advanced
2515 
2516 @*/
2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2518 {
2519   PetscErrorCode       ierr;
2520 
2521   PetscFunctionBegin;
2522   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2527 {
2528   PetscErrorCode       ierr;
2529   PetscBool            sc = PETSC_FALSE,flg;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2533   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2534   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2535   if (flg) {
2536     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2537   }
2538   ierr = PetscOptionsTail();CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2543 {
2544   PetscErrorCode ierr;
2545   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2546   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2547 
2548   PetscFunctionBegin;
2549   if (!Y->preallocated) {
2550     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2551   } else if (!aij->nz) {
2552     PetscInt nonew = aij->nonew;
2553     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2554     aij->nonew = nonew;
2555   }
2556   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2561 {
2562   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2567   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2568   if (d) {
2569     PetscInt rstart;
2570     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2571     *d += rstart;
2572 
2573   }
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2578 {
2579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2580   PetscErrorCode ierr;
2581 
2582   PetscFunctionBegin;
2583   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 /* -------------------------------------------------------------------*/
2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2589                                        MatGetRow_MPIAIJ,
2590                                        MatRestoreRow_MPIAIJ,
2591                                        MatMult_MPIAIJ,
2592                                 /* 4*/ MatMultAdd_MPIAIJ,
2593                                        MatMultTranspose_MPIAIJ,
2594                                        MatMultTransposeAdd_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*10*/ 0,
2599                                        0,
2600                                        0,
2601                                        MatSOR_MPIAIJ,
2602                                        MatTranspose_MPIAIJ,
2603                                 /*15*/ MatGetInfo_MPIAIJ,
2604                                        MatEqual_MPIAIJ,
2605                                        MatGetDiagonal_MPIAIJ,
2606                                        MatDiagonalScale_MPIAIJ,
2607                                        MatNorm_MPIAIJ,
2608                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2609                                        MatAssemblyEnd_MPIAIJ,
2610                                        MatSetOption_MPIAIJ,
2611                                        MatZeroEntries_MPIAIJ,
2612                                 /*24*/ MatZeroRows_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*29*/ MatSetUp_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        MatGetDiagonalBlock_MPIAIJ,
2621                                        0,
2622                                 /*34*/ MatDuplicate_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*39*/ MatAXPY_MPIAIJ,
2628                                        MatCreateSubMatrices_MPIAIJ,
2629                                        MatIncreaseOverlap_MPIAIJ,
2630                                        MatGetValues_MPIAIJ,
2631                                        MatCopy_MPIAIJ,
2632                                 /*44*/ MatGetRowMax_MPIAIJ,
2633                                        MatScale_MPIAIJ,
2634                                        MatShift_MPIAIJ,
2635                                        MatDiagonalSet_MPIAIJ,
2636                                        MatZeroRowsColumns_MPIAIJ,
2637                                 /*49*/ MatSetRandom_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2643                                        0,
2644                                        MatSetUnfactored_MPIAIJ,
2645                                        MatPermute_MPIAIJ,
2646                                        0,
2647                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2648                                        MatDestroy_MPIAIJ,
2649                                        MatView_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                 /*64*/ 0,
2653                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2658                                        MatGetRowMinAbs_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*75*/ MatFDColoringApply_AIJ,
2664                                        MatSetFromOptions_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        MatFindZeroDiagonals_MPIAIJ,
2668                                 /*80*/ 0,
2669                                        0,
2670                                        0,
2671                                 /*83*/ MatLoad_MPIAIJ,
2672                                        MatIsSymmetric_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*89*/ 0,
2678                                        0,
2679                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        MatBindToCPU_MPIAIJ,
2687                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2688                                        0,
2689                                        0,
2690                                        MatConjugate_MPIAIJ,
2691                                        0,
2692                                 /*104*/MatSetValuesRow_MPIAIJ,
2693                                        MatRealPart_MPIAIJ,
2694                                        MatImaginaryPart_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                 /*109*/0,
2698                                        0,
2699                                        MatGetRowMin_MPIAIJ,
2700                                        0,
2701                                        MatMissingDiagonal_MPIAIJ,
2702                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2703                                        0,
2704                                        MatGetGhosts_MPIAIJ,
2705                                        0,
2706                                        0,
2707                                 /*119*/0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatGetMultiProcBlock_MPIAIJ,
2712                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2713                                        MatGetColumnNorms_MPIAIJ,
2714                                        MatInvertBlockDiagonal_MPIAIJ,
2715                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2716                                        MatCreateSubMatricesMPI_MPIAIJ,
2717                                 /*129*/0,
2718                                        0,
2719                                        0,
2720                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                 /*134*/0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                 /*139*/MatSetBlockSizes_MPIAIJ,
2728                                        0,
2729                                        0,
2730                                        MatFDColoringSetUp_MPIXAIJ,
2731                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2732                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2733                                 /*145*/0,
2734                                        0,
2735                                        0
2736 };
2737 
2738 /* ----------------------------------------------------------------------------------------*/
2739 
2740 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2741 {
2742   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2747   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2752 {
2753   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2754   PetscErrorCode ierr;
2755 
2756   PetscFunctionBegin;
2757   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2758   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2763 {
2764   Mat_MPIAIJ     *b;
2765   PetscErrorCode ierr;
2766   PetscMPIInt    size;
2767 
2768   PetscFunctionBegin;
2769   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2771   b = (Mat_MPIAIJ*)B->data;
2772 
2773 #if defined(PETSC_USE_CTABLE)
2774   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2775 #else
2776   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2777 #endif
2778   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2779   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2780   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2781 
2782   /* Because the B will have been resized we simply destroy it and create a new one each time */
2783   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2784   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2785   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2786   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2787   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2788   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2789   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2790 
2791   if (!B->preallocated) {
2792     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2793     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2794     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2795     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2796     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2797   }
2798 
2799   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2800   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2801   B->preallocated  = PETSC_TRUE;
2802   B->was_assembled = PETSC_FALSE;
2803   B->assembled     = PETSC_FALSE;
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2808 {
2809   Mat_MPIAIJ     *b;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2814   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2815   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2816   b = (Mat_MPIAIJ*)B->data;
2817 
2818 #if defined(PETSC_USE_CTABLE)
2819   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2820 #else
2821   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2822 #endif
2823   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2824   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2825   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2826 
2827   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2828   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2829   B->preallocated  = PETSC_TRUE;
2830   B->was_assembled = PETSC_FALSE;
2831   B->assembled = PETSC_FALSE;
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2836 {
2837   Mat            mat;
2838   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2839   PetscErrorCode ierr;
2840 
2841   PetscFunctionBegin;
2842   *newmat = 0;
2843   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2844   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2845   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2846   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2847   a       = (Mat_MPIAIJ*)mat->data;
2848 
2849   mat->factortype   = matin->factortype;
2850   mat->assembled    = matin->assembled;
2851   mat->insertmode   = NOT_SET_VALUES;
2852   mat->preallocated = matin->preallocated;
2853 
2854   a->size         = oldmat->size;
2855   a->rank         = oldmat->rank;
2856   a->donotstash   = oldmat->donotstash;
2857   a->roworiented  = oldmat->roworiented;
2858   a->rowindices   = NULL;
2859   a->rowvalues    = NULL;
2860   a->getrowactive = PETSC_FALSE;
2861 
2862   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2863   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2864 
2865   if (oldmat->colmap) {
2866 #if defined(PETSC_USE_CTABLE)
2867     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2868 #else
2869     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2870     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2871     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2872 #endif
2873   } else a->colmap = NULL;
2874   if (oldmat->garray) {
2875     PetscInt len;
2876     len  = oldmat->B->cmap->n;
2877     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2878     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2879     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2880   } else a->garray = NULL;
2881 
2882   /* It may happen MatDuplicate is called with a non-assembled matrix
2883      In fact, MatDuplicate only requires the matrix to be preallocated
2884      This may happen inside a DMCreateMatrix_Shell */
2885   if (oldmat->lvec) {
2886     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2887     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2888   }
2889   if (oldmat->Mvctx) {
2890     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2891     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2892   }
2893   if (oldmat->Mvctx_mpi1) {
2894     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2895     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2896   }
2897 
2898   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2899   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2900   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2901   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2902   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2903   *newmat = mat;
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2908 {
2909   PetscBool      isbinary, ishdf5;
2910   PetscErrorCode ierr;
2911 
2912   PetscFunctionBegin;
2913   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2914   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2915   /* force binary viewer to load .info file if it has not yet done so */
2916   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2917   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2918   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2919   if (isbinary) {
2920     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2921   } else if (ishdf5) {
2922 #if defined(PETSC_HAVE_HDF5)
2923     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2924 #else
2925     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2926 #endif
2927   } else {
2928     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2929   }
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2934 {
2935   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2936   PetscInt       *rowidxs,*colidxs;
2937   PetscScalar    *matvals;
2938   PetscErrorCode ierr;
2939 
2940   PetscFunctionBegin;
2941   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2942 
2943   /* read in matrix header */
2944   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2945   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2946   M  = header[1]; N = header[2]; nz = header[3];
2947   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2948   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2949   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2950 
2951   /* set block sizes from the viewer's .info file */
2952   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2953   /* set global sizes if not set already */
2954   if (mat->rmap->N < 0) mat->rmap->N = M;
2955   if (mat->cmap->N < 0) mat->cmap->N = N;
2956   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2958 
2959   /* check if the matrix sizes are correct */
2960   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2961   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2962 
2963   /* read in row lengths and build row indices */
2964   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2965   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2966   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2967   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2968   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2969   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2970   /* read in column indices and matrix values */
2971   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2972   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2973   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2974   /* store matrix indices and values */
2975   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2976   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2977   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 /* Not scalable because of ISAllGather() unless getting all columns. */
2982 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2983 {
2984   PetscErrorCode ierr;
2985   IS             iscol_local;
2986   PetscBool      isstride;
2987   PetscMPIInt    lisstride=0,gisstride;
2988 
2989   PetscFunctionBegin;
2990   /* check if we are grabbing all columns*/
2991   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2992 
2993   if (isstride) {
2994     PetscInt  start,len,mstart,mlen;
2995     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2996     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2997     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2998     if (mstart == start && mlen-mstart == len) lisstride = 1;
2999   }
3000 
3001   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3002   if (gisstride) {
3003     PetscInt N;
3004     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3005     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3006     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3007     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3008   } else {
3009     PetscInt cbs;
3010     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3011     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3012     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3013   }
3014 
3015   *isseq = iscol_local;
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 /*
3020  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3021  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3022 
3023  Input Parameters:
3024    mat - matrix
3025    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3026            i.e., mat->rstart <= isrow[i] < mat->rend
3027    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->cstart <= iscol[i] < mat->cend
3029  Output Parameter:
3030    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3031    iscol_o - sequential column index set for retrieving mat->B
3032    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3033  */
3034 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3035 {
3036   PetscErrorCode ierr;
3037   Vec            x,cmap;
3038   const PetscInt *is_idx;
3039   PetscScalar    *xarray,*cmaparray;
3040   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3041   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3042   Mat            B=a->B;
3043   Vec            lvec=a->lvec,lcmap;
3044   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3045   MPI_Comm       comm;
3046   VecScatter     Mvctx=a->Mvctx;
3047 
3048   PetscFunctionBegin;
3049   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3050   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3051 
3052   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3053   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3054   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3055   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3056   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3057 
3058   /* Get start indices */
3059   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3060   isstart -= ncols;
3061   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3062 
3063   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3064   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3065   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3066   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3067   for (i=0; i<ncols; i++) {
3068     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3069     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3070     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3071   }
3072   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3073   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3074   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3075 
3076   /* Get iscol_d */
3077   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3078   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3079   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3080 
3081   /* Get isrow_d */
3082   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3083   rstart = mat->rmap->rstart;
3084   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3085   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3086   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3087   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3088 
3089   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3090   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3091   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3092 
3093   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3094   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3095   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3096 
3097   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3098 
3099   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3100   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3101 
3102   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3103   /* off-process column indices */
3104   count = 0;
3105   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3106   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3107 
3108   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3109   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3110   for (i=0; i<Bn; i++) {
3111     if (PetscRealPart(xarray[i]) > -1.0) {
3112       idx[count]     = i;                   /* local column index in off-diagonal part B */
3113       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3114       count++;
3115     }
3116   }
3117   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3118   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3119 
3120   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3121   /* cannot ensure iscol_o has same blocksize as iscol! */
3122 
3123   ierr = PetscFree(idx);CHKERRQ(ierr);
3124   *garray = cmap1;
3125 
3126   ierr = VecDestroy(&x);CHKERRQ(ierr);
3127   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3128   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3129   PetscFunctionReturn(0);
3130 }
3131 
3132 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3133 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3134 {
3135   PetscErrorCode ierr;
3136   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3137   Mat            M = NULL;
3138   MPI_Comm       comm;
3139   IS             iscol_d,isrow_d,iscol_o;
3140   Mat            Asub = NULL,Bsub = NULL;
3141   PetscInt       n;
3142 
3143   PetscFunctionBegin;
3144   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3145 
3146   if (call == MAT_REUSE_MATRIX) {
3147     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3148     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3149     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3150 
3151     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3152     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3153 
3154     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3155     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3156 
3157     /* Update diagonal and off-diagonal portions of submat */
3158     asub = (Mat_MPIAIJ*)(*submat)->data;
3159     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3160     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3161     if (n) {
3162       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3163     }
3164     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3166 
3167   } else { /* call == MAT_INITIAL_MATRIX) */
3168     const PetscInt *garray;
3169     PetscInt        BsubN;
3170 
3171     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3172     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3173 
3174     /* Create local submatrices Asub and Bsub */
3175     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3176     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3177 
3178     /* Create submatrix M */
3179     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3180 
3181     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3182     asub = (Mat_MPIAIJ*)M->data;
3183 
3184     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3185     n = asub->B->cmap->N;
3186     if (BsubN > n) {
3187       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3188       const PetscInt *idx;
3189       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3190       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3191 
3192       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3193       j = 0;
3194       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3195       for (i=0; i<n; i++) {
3196         if (j >= BsubN) break;
3197         while (subgarray[i] > garray[j]) j++;
3198 
3199         if (subgarray[i] == garray[j]) {
3200           idx_new[i] = idx[j++];
3201         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3202       }
3203       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3204 
3205       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3206       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3207 
3208     } else if (BsubN < n) {
3209       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3210     }
3211 
3212     ierr = PetscFree(garray);CHKERRQ(ierr);
3213     *submat = M;
3214 
3215     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3216     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3217     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3218 
3219     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3220     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3221 
3222     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3223     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3224   }
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3229 {
3230   PetscErrorCode ierr;
3231   IS             iscol_local=NULL,isrow_d;
3232   PetscInt       csize;
3233   PetscInt       n,i,j,start,end;
3234   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3235   MPI_Comm       comm;
3236 
3237   PetscFunctionBegin;
3238   /* If isrow has same processor distribution as mat,
3239      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3240   if (call == MAT_REUSE_MATRIX) {
3241     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3242     if (isrow_d) {
3243       sameRowDist  = PETSC_TRUE;
3244       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3245     } else {
3246       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3247       if (iscol_local) {
3248         sameRowDist  = PETSC_TRUE;
3249         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3250       }
3251     }
3252   } else {
3253     /* Check if isrow has same processor distribution as mat */
3254     sameDist[0] = PETSC_FALSE;
3255     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3256     if (!n) {
3257       sameDist[0] = PETSC_TRUE;
3258     } else {
3259       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3260       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3261       if (i >= start && j < end) {
3262         sameDist[0] = PETSC_TRUE;
3263       }
3264     }
3265 
3266     /* Check if iscol has same processor distribution as mat */
3267     sameDist[1] = PETSC_FALSE;
3268     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3269     if (!n) {
3270       sameDist[1] = PETSC_TRUE;
3271     } else {
3272       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3273       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3274       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3275     }
3276 
3277     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3278     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3279     sameRowDist = tsameDist[0];
3280   }
3281 
3282   if (sameRowDist) {
3283     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3284       /* isrow and iscol have same processor distribution as mat */
3285       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3286       PetscFunctionReturn(0);
3287     } else { /* sameRowDist */
3288       /* isrow has same processor distribution as mat */
3289       if (call == MAT_INITIAL_MATRIX) {
3290         PetscBool sorted;
3291         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3292         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3293         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3294         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3295 
3296         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3297         if (sorted) {
3298           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3299           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3300           PetscFunctionReturn(0);
3301         }
3302       } else { /* call == MAT_REUSE_MATRIX */
3303         IS    iscol_sub;
3304         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3305         if (iscol_sub) {
3306           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3307           PetscFunctionReturn(0);
3308         }
3309       }
3310     }
3311   }
3312 
3313   /* General case: iscol -> iscol_local which has global size of iscol */
3314   if (call == MAT_REUSE_MATRIX) {
3315     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3316     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3317   } else {
3318     if (!iscol_local) {
3319       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3320     }
3321   }
3322 
3323   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3324   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3325 
3326   if (call == MAT_INITIAL_MATRIX) {
3327     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3328     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3329   }
3330   PetscFunctionReturn(0);
3331 }
3332 
3333 /*@C
3334      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3335          and "off-diagonal" part of the matrix in CSR format.
3336 
3337    Collective
3338 
3339    Input Parameters:
3340 +  comm - MPI communicator
3341 .  A - "diagonal" portion of matrix
3342 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3343 -  garray - global index of B columns
3344 
3345    Output Parameter:
3346 .   mat - the matrix, with input A as its local diagonal matrix
3347    Level: advanced
3348 
3349    Notes:
3350        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3351        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3352 
3353 .seealso: MatCreateMPIAIJWithSplitArrays()
3354 @*/
3355 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3356 {
3357   PetscErrorCode ierr;
3358   Mat_MPIAIJ     *maij;
3359   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3360   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3361   PetscScalar    *oa=b->a;
3362   Mat            Bnew;
3363   PetscInt       m,n,N;
3364 
3365   PetscFunctionBegin;
3366   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3367   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3368   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3369   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3370   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3371   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3372 
3373   /* Get global columns of mat */
3374   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3375 
3376   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3377   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3378   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3379   maij = (Mat_MPIAIJ*)(*mat)->data;
3380 
3381   (*mat)->preallocated = PETSC_TRUE;
3382 
3383   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3384   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3385 
3386   /* Set A as diagonal portion of *mat */
3387   maij->A = A;
3388 
3389   nz = oi[m];
3390   for (i=0; i<nz; i++) {
3391     col   = oj[i];
3392     oj[i] = garray[col];
3393   }
3394 
3395    /* Set Bnew as off-diagonal portion of *mat */
3396   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3397   bnew        = (Mat_SeqAIJ*)Bnew->data;
3398   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3399   maij->B     = Bnew;
3400 
3401   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3402 
3403   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3404   b->free_a       = PETSC_FALSE;
3405   b->free_ij      = PETSC_FALSE;
3406   ierr = MatDestroy(&B);CHKERRQ(ierr);
3407 
3408   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3409   bnew->free_a       = PETSC_TRUE;
3410   bnew->free_ij      = PETSC_TRUE;
3411 
3412   /* condense columns of maij->B */
3413   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3414   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3415   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3416   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3417   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3422 
3423 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3424 {
3425   PetscErrorCode ierr;
3426   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3427   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3428   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3429   Mat            M,Msub,B=a->B;
3430   MatScalar      *aa;
3431   Mat_SeqAIJ     *aij;
3432   PetscInt       *garray = a->garray,*colsub,Ncols;
3433   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3434   IS             iscol_sub,iscmap;
3435   const PetscInt *is_idx,*cmap;
3436   PetscBool      allcolumns=PETSC_FALSE;
3437   MPI_Comm       comm;
3438 
3439   PetscFunctionBegin;
3440   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3441 
3442   if (call == MAT_REUSE_MATRIX) {
3443     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3444     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3445     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3446 
3447     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3448     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3449 
3450     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3451     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3452 
3453     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3454 
3455   } else { /* call == MAT_INITIAL_MATRIX) */
3456     PetscBool flg;
3457 
3458     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3459     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3460 
3461     /* (1) iscol -> nonscalable iscol_local */
3462     /* Check for special case: each processor gets entire matrix columns */
3463     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3464     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3465     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3466     if (allcolumns) {
3467       iscol_sub = iscol_local;
3468       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3469       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3470 
3471     } else {
3472       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3473       PetscInt *idx,*cmap1,k;
3474       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3475       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3476       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3477       count = 0;
3478       k     = 0;
3479       for (i=0; i<Ncols; i++) {
3480         j = is_idx[i];
3481         if (j >= cstart && j < cend) {
3482           /* diagonal part of mat */
3483           idx[count]     = j;
3484           cmap1[count++] = i; /* column index in submat */
3485         } else if (Bn) {
3486           /* off-diagonal part of mat */
3487           if (j == garray[k]) {
3488             idx[count]     = j;
3489             cmap1[count++] = i;  /* column index in submat */
3490           } else if (j > garray[k]) {
3491             while (j > garray[k] && k < Bn-1) k++;
3492             if (j == garray[k]) {
3493               idx[count]     = j;
3494               cmap1[count++] = i; /* column index in submat */
3495             }
3496           }
3497         }
3498       }
3499       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3500 
3501       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3502       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3503       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3504 
3505       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3506     }
3507 
3508     /* (3) Create sequential Msub */
3509     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3510   }
3511 
3512   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3513   aij  = (Mat_SeqAIJ*)(Msub)->data;
3514   ii   = aij->i;
3515   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3516 
3517   /*
3518       m - number of local rows
3519       Ncols - number of columns (same on all processors)
3520       rstart - first row in new global matrix generated
3521   */
3522   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3523 
3524   if (call == MAT_INITIAL_MATRIX) {
3525     /* (4) Create parallel newmat */
3526     PetscMPIInt    rank,size;
3527     PetscInt       csize;
3528 
3529     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3531 
3532     /*
3533         Determine the number of non-zeros in the diagonal and off-diagonal
3534         portions of the matrix in order to do correct preallocation
3535     */
3536 
3537     /* first get start and end of "diagonal" columns */
3538     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3539     if (csize == PETSC_DECIDE) {
3540       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3541       if (mglobal == Ncols) { /* square matrix */
3542         nlocal = m;
3543       } else {
3544         nlocal = Ncols/size + ((Ncols % size) > rank);
3545       }
3546     } else {
3547       nlocal = csize;
3548     }
3549     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3550     rstart = rend - nlocal;
3551     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3552 
3553     /* next, compute all the lengths */
3554     jj    = aij->j;
3555     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3556     olens = dlens + m;
3557     for (i=0; i<m; i++) {
3558       jend = ii[i+1] - ii[i];
3559       olen = 0;
3560       dlen = 0;
3561       for (j=0; j<jend; j++) {
3562         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3563         else dlen++;
3564         jj++;
3565       }
3566       olens[i] = olen;
3567       dlens[i] = dlen;
3568     }
3569 
3570     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3571     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3572 
3573     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3574     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3575     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3576     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3577     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3578     ierr = PetscFree(dlens);CHKERRQ(ierr);
3579 
3580   } else { /* call == MAT_REUSE_MATRIX */
3581     M    = *newmat;
3582     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3583     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3584     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3585     /*
3586          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3587        rather than the slower MatSetValues().
3588     */
3589     M->was_assembled = PETSC_TRUE;
3590     M->assembled     = PETSC_FALSE;
3591   }
3592 
3593   /* (5) Set values of Msub to *newmat */
3594   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3595   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3596 
3597   jj   = aij->j;
3598   aa   = aij->a;
3599   for (i=0; i<m; i++) {
3600     row = rstart + i;
3601     nz  = ii[i+1] - ii[i];
3602     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3603     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3604     jj += nz; aa += nz;
3605   }
3606   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3607 
3608   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3609   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610 
3611   ierr = PetscFree(colsub);CHKERRQ(ierr);
3612 
3613   /* save Msub, iscol_sub and iscmap used in processor for next request */
3614   if (call ==  MAT_INITIAL_MATRIX) {
3615     *newmat = M;
3616     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3617     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3618 
3619     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3620     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3621 
3622     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3623     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3624 
3625     if (iscol_local) {
3626       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3627       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3628     }
3629   }
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 /*
3634     Not great since it makes two copies of the submatrix, first an SeqAIJ
3635   in local and then by concatenating the local matrices the end result.
3636   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3637 
3638   Note: This requires a sequential iscol with all indices.
3639 */
3640 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3641 {
3642   PetscErrorCode ierr;
3643   PetscMPIInt    rank,size;
3644   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3645   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3646   Mat            M,Mreuse;
3647   MatScalar      *aa,*vwork;
3648   MPI_Comm       comm;
3649   Mat_SeqAIJ     *aij;
3650   PetscBool      colflag,allcolumns=PETSC_FALSE;
3651 
3652   PetscFunctionBegin;
3653   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3654   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3655   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3656 
3657   /* Check for special case: each processor gets entire matrix columns */
3658   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3659   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3660   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3661   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3662 
3663   if (call ==  MAT_REUSE_MATRIX) {
3664     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3665     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3666     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3667   } else {
3668     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3669   }
3670 
3671   /*
3672       m - number of local rows
3673       n - number of columns (same on all processors)
3674       rstart - first row in new global matrix generated
3675   */
3676   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3677   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3678   if (call == MAT_INITIAL_MATRIX) {
3679     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3680     ii  = aij->i;
3681     jj  = aij->j;
3682 
3683     /*
3684         Determine the number of non-zeros in the diagonal and off-diagonal
3685         portions of the matrix in order to do correct preallocation
3686     */
3687 
3688     /* first get start and end of "diagonal" columns */
3689     if (csize == PETSC_DECIDE) {
3690       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3691       if (mglobal == n) { /* square matrix */
3692         nlocal = m;
3693       } else {
3694         nlocal = n/size + ((n % size) > rank);
3695       }
3696     } else {
3697       nlocal = csize;
3698     }
3699     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3700     rstart = rend - nlocal;
3701     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3702 
3703     /* next, compute all the lengths */
3704     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3705     olens = dlens + m;
3706     for (i=0; i<m; i++) {
3707       jend = ii[i+1] - ii[i];
3708       olen = 0;
3709       dlen = 0;
3710       for (j=0; j<jend; j++) {
3711         if (*jj < rstart || *jj >= rend) olen++;
3712         else dlen++;
3713         jj++;
3714       }
3715       olens[i] = olen;
3716       dlens[i] = dlen;
3717     }
3718     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3719     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3720     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3721     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3722     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3723     ierr = PetscFree(dlens);CHKERRQ(ierr);
3724   } else {
3725     PetscInt ml,nl;
3726 
3727     M    = *newmat;
3728     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3729     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3730     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3731     /*
3732          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3733        rather than the slower MatSetValues().
3734     */
3735     M->was_assembled = PETSC_TRUE;
3736     M->assembled     = PETSC_FALSE;
3737   }
3738   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3739   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3740   ii   = aij->i;
3741   jj   = aij->j;
3742   aa   = aij->a;
3743   for (i=0; i<m; i++) {
3744     row   = rstart + i;
3745     nz    = ii[i+1] - ii[i];
3746     cwork = jj;     jj += nz;
3747     vwork = aa;     aa += nz;
3748     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3749   }
3750 
3751   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753   *newmat = M;
3754 
3755   /* save submatrix used in processor for next request */
3756   if (call ==  MAT_INITIAL_MATRIX) {
3757     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3758     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3759   }
3760   PetscFunctionReturn(0);
3761 }
3762 
3763 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3764 {
3765   PetscInt       m,cstart, cend,j,nnz,i,d;
3766   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3767   const PetscInt *JJ;
3768   PetscErrorCode ierr;
3769   PetscBool      nooffprocentries;
3770 
3771   PetscFunctionBegin;
3772   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3773 
3774   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3775   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3776   m      = B->rmap->n;
3777   cstart = B->cmap->rstart;
3778   cend   = B->cmap->rend;
3779   rstart = B->rmap->rstart;
3780 
3781   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3782 
3783   if (PetscDefined(USE_DEBUG)) {
3784     for (i=0; i<m; i++) {
3785       nnz = Ii[i+1]- Ii[i];
3786       JJ  = J + Ii[i];
3787       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3788       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3789       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3790     }
3791   }
3792 
3793   for (i=0; i<m; i++) {
3794     nnz     = Ii[i+1]- Ii[i];
3795     JJ      = J + Ii[i];
3796     nnz_max = PetscMax(nnz_max,nnz);
3797     d       = 0;
3798     for (j=0; j<nnz; j++) {
3799       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3800     }
3801     d_nnz[i] = d;
3802     o_nnz[i] = nnz - d;
3803   }
3804   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3805   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3806 
3807   for (i=0; i<m; i++) {
3808     ii   = i + rstart;
3809     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3810   }
3811   nooffprocentries    = B->nooffprocentries;
3812   B->nooffprocentries = PETSC_TRUE;
3813   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3814   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3815   B->nooffprocentries = nooffprocentries;
3816 
3817   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3818   PetscFunctionReturn(0);
3819 }
3820 
3821 /*@
3822    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3823    (the default parallel PETSc format).
3824 
3825    Collective
3826 
3827    Input Parameters:
3828 +  B - the matrix
3829 .  i - the indices into j for the start of each local row (starts with zero)
3830 .  j - the column indices for each local row (starts with zero)
3831 -  v - optional values in the matrix
3832 
3833    Level: developer
3834 
3835    Notes:
3836        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3837      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3838      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3839 
3840        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3841 
3842        The format which is used for the sparse matrix input, is equivalent to a
3843     row-major ordering.. i.e for the following matrix, the input data expected is
3844     as shown
3845 
3846 $        1 0 0
3847 $        2 0 3     P0
3848 $       -------
3849 $        4 5 6     P1
3850 $
3851 $     Process0 [P0]: rows_owned=[0,1]
3852 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3853 $        j =  {0,0,2}  [size = 3]
3854 $        v =  {1,2,3}  [size = 3]
3855 $
3856 $     Process1 [P1]: rows_owned=[2]
3857 $        i =  {0,3}    [size = nrow+1  = 1+1]
3858 $        j =  {0,1,2}  [size = 3]
3859 $        v =  {4,5,6}  [size = 3]
3860 
3861 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3862           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3863 @*/
3864 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3865 {
3866   PetscErrorCode ierr;
3867 
3868   PetscFunctionBegin;
3869   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3870   PetscFunctionReturn(0);
3871 }
3872 
3873 /*@C
3874    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3875    (the default parallel PETSc format).  For good matrix assembly performance
3876    the user should preallocate the matrix storage by setting the parameters
3877    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3878    performance can be increased by more than a factor of 50.
3879 
3880    Collective
3881 
3882    Input Parameters:
3883 +  B - the matrix
3884 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3885            (same value is used for all local rows)
3886 .  d_nnz - array containing the number of nonzeros in the various rows of the
3887            DIAGONAL portion of the local submatrix (possibly different for each row)
3888            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3889            The size of this array is equal to the number of local rows, i.e 'm'.
3890            For matrices that will be factored, you must leave room for (and set)
3891            the diagonal entry even if it is zero.
3892 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3893            submatrix (same value is used for all local rows).
3894 -  o_nnz - array containing the number of nonzeros in the various rows of the
3895            OFF-DIAGONAL portion of the local submatrix (possibly different for
3896            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3897            structure. The size of this array is equal to the number
3898            of local rows, i.e 'm'.
3899 
3900    If the *_nnz parameter is given then the *_nz parameter is ignored
3901 
3902    The AIJ format (also called the Yale sparse matrix format or
3903    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3904    storage.  The stored row and column indices begin with zero.
3905    See Users-Manual: ch_mat for details.
3906 
3907    The parallel matrix is partitioned such that the first m0 rows belong to
3908    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3909    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3910 
3911    The DIAGONAL portion of the local submatrix of a processor can be defined
3912    as the submatrix which is obtained by extraction the part corresponding to
3913    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3914    first row that belongs to the processor, r2 is the last row belonging to
3915    the this processor, and c1-c2 is range of indices of the local part of a
3916    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3917    common case of a square matrix, the row and column ranges are the same and
3918    the DIAGONAL part is also square. The remaining portion of the local
3919    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3920 
3921    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3922 
3923    You can call MatGetInfo() to get information on how effective the preallocation was;
3924    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3925    You can also run with the option -info and look for messages with the string
3926    malloc in them to see if additional memory allocation was needed.
3927 
3928    Example usage:
3929 
3930    Consider the following 8x8 matrix with 34 non-zero values, that is
3931    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3932    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3933    as follows:
3934 
3935 .vb
3936             1  2  0  |  0  3  0  |  0  4
3937     Proc0   0  5  6  |  7  0  0  |  8  0
3938             9  0 10  | 11  0  0  | 12  0
3939     -------------------------------------
3940            13  0 14  | 15 16 17  |  0  0
3941     Proc1   0 18  0  | 19 20 21  |  0  0
3942             0  0  0  | 22 23  0  | 24  0
3943     -------------------------------------
3944     Proc2  25 26 27  |  0  0 28  | 29  0
3945            30  0  0  | 31 32 33  |  0 34
3946 .ve
3947 
3948    This can be represented as a collection of submatrices as:
3949 
3950 .vb
3951       A B C
3952       D E F
3953       G H I
3954 .ve
3955 
3956    Where the submatrices A,B,C are owned by proc0, D,E,F are
3957    owned by proc1, G,H,I are owned by proc2.
3958 
3959    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3960    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3961    The 'M','N' parameters are 8,8, and have the same values on all procs.
3962 
3963    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3964    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3965    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3966    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3967    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3968    matrix, ans [DF] as another SeqAIJ matrix.
3969 
3970    When d_nz, o_nz parameters are specified, d_nz storage elements are
3971    allocated for every row of the local diagonal submatrix, and o_nz
3972    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3973    One way to choose d_nz and o_nz is to use the max nonzerors per local
3974    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3975    In this case, the values of d_nz,o_nz are:
3976 .vb
3977      proc0 : dnz = 2, o_nz = 2
3978      proc1 : dnz = 3, o_nz = 2
3979      proc2 : dnz = 1, o_nz = 4
3980 .ve
3981    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3982    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3983    for proc3. i.e we are using 12+15+10=37 storage locations to store
3984    34 values.
3985 
3986    When d_nnz, o_nnz parameters are specified, the storage is specified
3987    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3988    In the above case the values for d_nnz,o_nnz are:
3989 .vb
3990      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3991      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3992      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3993 .ve
3994    Here the space allocated is sum of all the above values i.e 34, and
3995    hence pre-allocation is perfect.
3996 
3997    Level: intermediate
3998 
3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4000           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4001 @*/
4002 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4003 {
4004   PetscErrorCode ierr;
4005 
4006   PetscFunctionBegin;
4007   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4008   PetscValidType(B,1);
4009   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 /*@
4014      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4015          CSR format for the local rows.
4016 
4017    Collective
4018 
4019    Input Parameters:
4020 +  comm - MPI communicator
4021 .  m - number of local rows (Cannot be PETSC_DECIDE)
4022 .  n - This value should be the same as the local size used in creating the
4023        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4024        calculated if N is given) For square matrices n is almost always m.
4025 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4026 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4027 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4028 .   j - column indices
4029 -   a - matrix values
4030 
4031    Output Parameter:
4032 .   mat - the matrix
4033 
4034    Level: intermediate
4035 
4036    Notes:
4037        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4038      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4039      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4040 
4041        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4042 
4043        The format which is used for the sparse matrix input, is equivalent to a
4044     row-major ordering.. i.e for the following matrix, the input data expected is
4045     as shown
4046 
4047        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4048 
4049 $        1 0 0
4050 $        2 0 3     P0
4051 $       -------
4052 $        4 5 6     P1
4053 $
4054 $     Process0 [P0]: rows_owned=[0,1]
4055 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4056 $        j =  {0,0,2}  [size = 3]
4057 $        v =  {1,2,3}  [size = 3]
4058 $
4059 $     Process1 [P1]: rows_owned=[2]
4060 $        i =  {0,3}    [size = nrow+1  = 1+1]
4061 $        j =  {0,1,2}  [size = 3]
4062 $        v =  {4,5,6}  [size = 3]
4063 
4064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4065           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4066 @*/
4067 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4068 {
4069   PetscErrorCode ierr;
4070 
4071   PetscFunctionBegin;
4072   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4073   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4074   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4075   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4076   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4077   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4078   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4079   PetscFunctionReturn(0);
4080 }
4081 
4082 /*@
4083      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4084          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4085 
4086    Collective
4087 
4088    Input Parameters:
4089 +  mat - the matrix
4090 .  m - number of local rows (Cannot be PETSC_DECIDE)
4091 .  n - This value should be the same as the local size used in creating the
4092        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4093        calculated if N is given) For square matrices n is almost always m.
4094 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4095 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4096 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4097 .  J - column indices
4098 -  v - matrix values
4099 
4100    Level: intermediate
4101 
4102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4103           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4104 @*/
4105 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4106 {
4107   PetscErrorCode ierr;
4108   PetscInt       cstart,nnz,i,j;
4109   PetscInt       *ld;
4110   PetscBool      nooffprocentries;
4111   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4112   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4113   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4114   const PetscInt *Adi = Ad->i;
4115   PetscInt       ldi,Iii,md;
4116 
4117   PetscFunctionBegin;
4118   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4119   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4120   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4121   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4122 
4123   cstart = mat->cmap->rstart;
4124   if (!Aij->ld) {
4125     /* count number of entries below block diagonal */
4126     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4127     Aij->ld = ld;
4128     for (i=0; i<m; i++) {
4129       nnz  = Ii[i+1]- Ii[i];
4130       j     = 0;
4131       while  (J[j] < cstart && j < nnz) {j++;}
4132       J    += nnz;
4133       ld[i] = j;
4134     }
4135   } else {
4136     ld = Aij->ld;
4137   }
4138 
4139   for (i=0; i<m; i++) {
4140     nnz  = Ii[i+1]- Ii[i];
4141     Iii  = Ii[i];
4142     ldi  = ld[i];
4143     md   = Adi[i+1]-Adi[i];
4144     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4145     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4146     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4147     ad  += md;
4148     ao  += nnz - md;
4149   }
4150   nooffprocentries      = mat->nooffprocentries;
4151   mat->nooffprocentries = PETSC_TRUE;
4152   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4153   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4154   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4155   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4156   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4157   mat->nooffprocentries = nooffprocentries;
4158   PetscFunctionReturn(0);
4159 }
4160 
4161 /*@C
4162    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4163    (the default parallel PETSc format).  For good matrix assembly performance
4164    the user should preallocate the matrix storage by setting the parameters
4165    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4166    performance can be increased by more than a factor of 50.
4167 
4168    Collective
4169 
4170    Input Parameters:
4171 +  comm - MPI communicator
4172 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4173            This value should be the same as the local size used in creating the
4174            y vector for the matrix-vector product y = Ax.
4175 .  n - This value should be the same as the local size used in creating the
4176        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4177        calculated if N is given) For square matrices n is almost always m.
4178 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4179 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4180 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4181            (same value is used for all local rows)
4182 .  d_nnz - array containing the number of nonzeros in the various rows of the
4183            DIAGONAL portion of the local submatrix (possibly different for each row)
4184            or NULL, if d_nz is used to specify the nonzero structure.
4185            The size of this array is equal to the number of local rows, i.e 'm'.
4186 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4187            submatrix (same value is used for all local rows).
4188 -  o_nnz - array containing the number of nonzeros in the various rows of the
4189            OFF-DIAGONAL portion of the local submatrix (possibly different for
4190            each row) or NULL, if o_nz is used to specify the nonzero
4191            structure. The size of this array is equal to the number
4192            of local rows, i.e 'm'.
4193 
4194    Output Parameter:
4195 .  A - the matrix
4196 
4197    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4198    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4199    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4200 
4201    Notes:
4202    If the *_nnz parameter is given then the *_nz parameter is ignored
4203 
4204    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4205    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4206    storage requirements for this matrix.
4207 
4208    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4209    processor than it must be used on all processors that share the object for
4210    that argument.
4211 
4212    The user MUST specify either the local or global matrix dimensions
4213    (possibly both).
4214 
4215    The parallel matrix is partitioned across processors such that the
4216    first m0 rows belong to process 0, the next m1 rows belong to
4217    process 1, the next m2 rows belong to process 2 etc.. where
4218    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4219    values corresponding to [m x N] submatrix.
4220 
4221    The columns are logically partitioned with the n0 columns belonging
4222    to 0th partition, the next n1 columns belonging to the next
4223    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4224 
4225    The DIAGONAL portion of the local submatrix on any given processor
4226    is the submatrix corresponding to the rows and columns m,n
4227    corresponding to the given processor. i.e diagonal matrix on
4228    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4229    etc. The remaining portion of the local submatrix [m x (N-n)]
4230    constitute the OFF-DIAGONAL portion. The example below better
4231    illustrates this concept.
4232 
4233    For a square global matrix we define each processor's diagonal portion
4234    to be its local rows and the corresponding columns (a square submatrix);
4235    each processor's off-diagonal portion encompasses the remainder of the
4236    local matrix (a rectangular submatrix).
4237 
4238    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4239 
4240    When calling this routine with a single process communicator, a matrix of
4241    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4242    type of communicator, use the construction mechanism
4243 .vb
4244      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4245 .ve
4246 
4247 $     MatCreate(...,&A);
4248 $     MatSetType(A,MATMPIAIJ);
4249 $     MatSetSizes(A, m,n,M,N);
4250 $     MatMPIAIJSetPreallocation(A,...);
4251 
4252    By default, this format uses inodes (identical nodes) when possible.
4253    We search for consecutive rows with the same nonzero structure, thereby
4254    reusing matrix information to achieve increased efficiency.
4255 
4256    Options Database Keys:
4257 +  -mat_no_inode  - Do not use inodes
4258 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4259 
4260 
4261 
4262    Example usage:
4263 
4264    Consider the following 8x8 matrix with 34 non-zero values, that is
4265    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4266    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4267    as follows
4268 
4269 .vb
4270             1  2  0  |  0  3  0  |  0  4
4271     Proc0   0  5  6  |  7  0  0  |  8  0
4272             9  0 10  | 11  0  0  | 12  0
4273     -------------------------------------
4274            13  0 14  | 15 16 17  |  0  0
4275     Proc1   0 18  0  | 19 20 21  |  0  0
4276             0  0  0  | 22 23  0  | 24  0
4277     -------------------------------------
4278     Proc2  25 26 27  |  0  0 28  | 29  0
4279            30  0  0  | 31 32 33  |  0 34
4280 .ve
4281 
4282    This can be represented as a collection of submatrices as
4283 
4284 .vb
4285       A B C
4286       D E F
4287       G H I
4288 .ve
4289 
4290    Where the submatrices A,B,C are owned by proc0, D,E,F are
4291    owned by proc1, G,H,I are owned by proc2.
4292 
4293    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4294    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4295    The 'M','N' parameters are 8,8, and have the same values on all procs.
4296 
4297    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4298    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4299    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4300    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4301    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4302    matrix, ans [DF] as another SeqAIJ matrix.
4303 
4304    When d_nz, o_nz parameters are specified, d_nz storage elements are
4305    allocated for every row of the local diagonal submatrix, and o_nz
4306    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4307    One way to choose d_nz and o_nz is to use the max nonzerors per local
4308    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4309    In this case, the values of d_nz,o_nz are
4310 .vb
4311      proc0 : dnz = 2, o_nz = 2
4312      proc1 : dnz = 3, o_nz = 2
4313      proc2 : dnz = 1, o_nz = 4
4314 .ve
4315    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4316    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4317    for proc3. i.e we are using 12+15+10=37 storage locations to store
4318    34 values.
4319 
4320    When d_nnz, o_nnz parameters are specified, the storage is specified
4321    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4322    In the above case the values for d_nnz,o_nnz are
4323 .vb
4324      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4325      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4326      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4327 .ve
4328    Here the space allocated is sum of all the above values i.e 34, and
4329    hence pre-allocation is perfect.
4330 
4331    Level: intermediate
4332 
4333 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4334           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4335 @*/
4336 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4337 {
4338   PetscErrorCode ierr;
4339   PetscMPIInt    size;
4340 
4341   PetscFunctionBegin;
4342   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4343   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4344   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4345   if (size > 1) {
4346     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4347     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4348   } else {
4349     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4350     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4351   }
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 /*@C
4356   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4357 
4358   Not collective
4359 
4360   Input Parameter:
4361 . A - The MPIAIJ matrix
4362 
4363   Output Parameters:
4364 + Ad - The local diagonal block as a SeqAIJ matrix
4365 . Ao - The local off-diagonal block as a SeqAIJ matrix
4366 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4367 
4368   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4369   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4370   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4371   local column numbers to global column numbers in the original matrix.
4372 
4373   Level: intermediate
4374 
4375 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4376 @*/
4377 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4378 {
4379   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4380   PetscBool      flg;
4381   PetscErrorCode ierr;
4382 
4383   PetscFunctionBegin;
4384   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4385   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4386   if (Ad)     *Ad     = a->A;
4387   if (Ao)     *Ao     = a->B;
4388   if (colmap) *colmap = a->garray;
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4393 {
4394   PetscErrorCode ierr;
4395   PetscInt       m,N,i,rstart,nnz,Ii;
4396   PetscInt       *indx;
4397   PetscScalar    *values;
4398 
4399   PetscFunctionBegin;
4400   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4401   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4402     PetscInt       *dnz,*onz,sum,bs,cbs;
4403 
4404     if (n == PETSC_DECIDE) {
4405       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4406     }
4407     /* Check sum(n) = N */
4408     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4409     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4410 
4411     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4412     rstart -= m;
4413 
4414     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4415     for (i=0; i<m; i++) {
4416       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4417       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4418       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4419     }
4420 
4421     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4422     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4423     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4424     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4425     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4426     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4427     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4428     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4429   }
4430 
4431   /* numeric phase */
4432   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4433   for (i=0; i<m; i++) {
4434     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4435     Ii   = i + rstart;
4436     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4437     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4438   }
4439   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4440   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4441   PetscFunctionReturn(0);
4442 }
4443 
4444 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4445 {
4446   PetscErrorCode    ierr;
4447   PetscMPIInt       rank;
4448   PetscInt          m,N,i,rstart,nnz;
4449   size_t            len;
4450   const PetscInt    *indx;
4451   PetscViewer       out;
4452   char              *name;
4453   Mat               B;
4454   const PetscScalar *values;
4455 
4456   PetscFunctionBegin;
4457   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4458   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4459   /* Should this be the type of the diagonal block of A? */
4460   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4461   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4462   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4463   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4464   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4465   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4466   for (i=0; i<m; i++) {
4467     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4468     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4469     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4470   }
4471   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4473 
4474   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4475   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4476   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4477   sprintf(name,"%s.%d",outfile,rank);
4478   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4479   ierr = PetscFree(name);CHKERRQ(ierr);
4480   ierr = MatView(B,out);CHKERRQ(ierr);
4481   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4482   ierr = MatDestroy(&B);CHKERRQ(ierr);
4483   PetscFunctionReturn(0);
4484 }
4485 
4486 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4487 {
4488   PetscErrorCode      ierr;
4489   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4490 
4491   PetscFunctionBegin;
4492   if (!merge) PetscFunctionReturn(0);
4493   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4494   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4495   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4496   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4497   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4498   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4499   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4500   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4501   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4502   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4503   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4504   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4505   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4506   ierr = PetscFree(merge);CHKERRQ(ierr);
4507   PetscFunctionReturn(0);
4508 }
4509 
4510 #include <../src/mat/utils/freespace.h>
4511 #include <petscbt.h>
4512 
4513 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4514 {
4515   PetscErrorCode      ierr;
4516   MPI_Comm            comm;
4517   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4518   PetscMPIInt         size,rank,taga,*len_s;
4519   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4520   PetscInt            proc,m;
4521   PetscInt            **buf_ri,**buf_rj;
4522   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4523   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4524   MPI_Request         *s_waits,*r_waits;
4525   MPI_Status          *status;
4526   MatScalar           *aa=a->a;
4527   MatScalar           **abuf_r,*ba_i;
4528   Mat_Merge_SeqsToMPI *merge;
4529   PetscContainer      container;
4530 
4531   PetscFunctionBegin;
4532   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4533   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4534 
4535   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4536   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4537 
4538   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4539   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4540   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4541 
4542   bi     = merge->bi;
4543   bj     = merge->bj;
4544   buf_ri = merge->buf_ri;
4545   buf_rj = merge->buf_rj;
4546 
4547   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4548   owners = merge->rowmap->range;
4549   len_s  = merge->len_s;
4550 
4551   /* send and recv matrix values */
4552   /*-----------------------------*/
4553   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4554   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4555 
4556   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4557   for (proc=0,k=0; proc<size; proc++) {
4558     if (!len_s[proc]) continue;
4559     i    = owners[proc];
4560     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4561     k++;
4562   }
4563 
4564   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4565   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4566   ierr = PetscFree(status);CHKERRQ(ierr);
4567 
4568   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4569   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4570 
4571   /* insert mat values of mpimat */
4572   /*----------------------------*/
4573   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4574   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4575 
4576   for (k=0; k<merge->nrecv; k++) {
4577     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4578     nrows       = *(buf_ri_k[k]);
4579     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4580     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4581   }
4582 
4583   /* set values of ba */
4584   m = merge->rowmap->n;
4585   for (i=0; i<m; i++) {
4586     arow = owners[rank] + i;
4587     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4588     bnzi = bi[i+1] - bi[i];
4589     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4590 
4591     /* add local non-zero vals of this proc's seqmat into ba */
4592     anzi   = ai[arow+1] - ai[arow];
4593     aj     = a->j + ai[arow];
4594     aa     = a->a + ai[arow];
4595     nextaj = 0;
4596     for (j=0; nextaj<anzi; j++) {
4597       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4598         ba_i[j] += aa[nextaj++];
4599       }
4600     }
4601 
4602     /* add received vals into ba */
4603     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4604       /* i-th row */
4605       if (i == *nextrow[k]) {
4606         anzi   = *(nextai[k]+1) - *nextai[k];
4607         aj     = buf_rj[k] + *(nextai[k]);
4608         aa     = abuf_r[k] + *(nextai[k]);
4609         nextaj = 0;
4610         for (j=0; nextaj<anzi; j++) {
4611           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4612             ba_i[j] += aa[nextaj++];
4613           }
4614         }
4615         nextrow[k]++; nextai[k]++;
4616       }
4617     }
4618     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4619   }
4620   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4621   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4622 
4623   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4624   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4625   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4626   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4627   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4628   PetscFunctionReturn(0);
4629 }
4630 
4631 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4632 {
4633   PetscErrorCode      ierr;
4634   Mat                 B_mpi;
4635   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4636   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4637   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4638   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4639   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4640   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4641   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4642   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4643   MPI_Status          *status;
4644   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4645   PetscBT             lnkbt;
4646   Mat_Merge_SeqsToMPI *merge;
4647   PetscContainer      container;
4648 
4649   PetscFunctionBegin;
4650   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4651 
4652   /* make sure it is a PETSc comm */
4653   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4654   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4655   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4656 
4657   ierr = PetscNew(&merge);CHKERRQ(ierr);
4658   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4659 
4660   /* determine row ownership */
4661   /*---------------------------------------------------------*/
4662   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4663   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4664   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4667   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4668   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4669 
4670   m      = merge->rowmap->n;
4671   owners = merge->rowmap->range;
4672 
4673   /* determine the number of messages to send, their lengths */
4674   /*---------------------------------------------------------*/
4675   len_s = merge->len_s;
4676 
4677   len          = 0; /* length of buf_si[] */
4678   merge->nsend = 0;
4679   for (proc=0; proc<size; proc++) {
4680     len_si[proc] = 0;
4681     if (proc == rank) {
4682       len_s[proc] = 0;
4683     } else {
4684       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4685       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4686     }
4687     if (len_s[proc]) {
4688       merge->nsend++;
4689       nrows = 0;
4690       for (i=owners[proc]; i<owners[proc+1]; i++) {
4691         if (ai[i+1] > ai[i]) nrows++;
4692       }
4693       len_si[proc] = 2*(nrows+1);
4694       len         += len_si[proc];
4695     }
4696   }
4697 
4698   /* determine the number and length of messages to receive for ij-structure */
4699   /*-------------------------------------------------------------------------*/
4700   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4701   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4702 
4703   /* post the Irecv of j-structure */
4704   /*-------------------------------*/
4705   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4706   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4707 
4708   /* post the Isend of j-structure */
4709   /*--------------------------------*/
4710   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4711 
4712   for (proc=0, k=0; proc<size; proc++) {
4713     if (!len_s[proc]) continue;
4714     i    = owners[proc];
4715     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4716     k++;
4717   }
4718 
4719   /* receives and sends of j-structure are complete */
4720   /*------------------------------------------------*/
4721   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4722   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4723 
4724   /* send and recv i-structure */
4725   /*---------------------------*/
4726   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4727   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4728 
4729   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4730   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4731   for (proc=0,k=0; proc<size; proc++) {
4732     if (!len_s[proc]) continue;
4733     /* form outgoing message for i-structure:
4734          buf_si[0]:                 nrows to be sent
4735                [1:nrows]:           row index (global)
4736                [nrows+1:2*nrows+1]: i-structure index
4737     */
4738     /*-------------------------------------------*/
4739     nrows       = len_si[proc]/2 - 1;
4740     buf_si_i    = buf_si + nrows+1;
4741     buf_si[0]   = nrows;
4742     buf_si_i[0] = 0;
4743     nrows       = 0;
4744     for (i=owners[proc]; i<owners[proc+1]; i++) {
4745       anzi = ai[i+1] - ai[i];
4746       if (anzi) {
4747         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4748         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4749         nrows++;
4750       }
4751     }
4752     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4753     k++;
4754     buf_si += len_si[proc];
4755   }
4756 
4757   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4758   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4759 
4760   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4761   for (i=0; i<merge->nrecv; i++) {
4762     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4763   }
4764 
4765   ierr = PetscFree(len_si);CHKERRQ(ierr);
4766   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4767   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4768   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4769   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4770   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4771   ierr = PetscFree(status);CHKERRQ(ierr);
4772 
4773   /* compute a local seq matrix in each processor */
4774   /*----------------------------------------------*/
4775   /* allocate bi array and free space for accumulating nonzero column info */
4776   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4777   bi[0] = 0;
4778 
4779   /* create and initialize a linked list */
4780   nlnk = N+1;
4781   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4782 
4783   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4784   len  = ai[owners[rank+1]] - ai[owners[rank]];
4785   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4786 
4787   current_space = free_space;
4788 
4789   /* determine symbolic info for each local row */
4790   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4791 
4792   for (k=0; k<merge->nrecv; k++) {
4793     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4794     nrows       = *buf_ri_k[k];
4795     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4796     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4797   }
4798 
4799   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4800   len  = 0;
4801   for (i=0; i<m; i++) {
4802     bnzi = 0;
4803     /* add local non-zero cols of this proc's seqmat into lnk */
4804     arow  = owners[rank] + i;
4805     anzi  = ai[arow+1] - ai[arow];
4806     aj    = a->j + ai[arow];
4807     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4808     bnzi += nlnk;
4809     /* add received col data into lnk */
4810     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4811       if (i == *nextrow[k]) { /* i-th row */
4812         anzi  = *(nextai[k]+1) - *nextai[k];
4813         aj    = buf_rj[k] + *nextai[k];
4814         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4815         bnzi += nlnk;
4816         nextrow[k]++; nextai[k]++;
4817       }
4818     }
4819     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4820 
4821     /* if free space is not available, make more free space */
4822     if (current_space->local_remaining<bnzi) {
4823       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4824       nspacedouble++;
4825     }
4826     /* copy data into free space, then initialize lnk */
4827     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4828     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4829 
4830     current_space->array           += bnzi;
4831     current_space->local_used      += bnzi;
4832     current_space->local_remaining -= bnzi;
4833 
4834     bi[i+1] = bi[i] + bnzi;
4835   }
4836 
4837   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4838 
4839   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4840   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4841   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4842 
4843   /* create symbolic parallel matrix B_mpi */
4844   /*---------------------------------------*/
4845   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4846   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4847   if (n==PETSC_DECIDE) {
4848     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4849   } else {
4850     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4851   }
4852   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4853   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4854   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4855   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4856   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4857 
4858   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4859   B_mpi->assembled  = PETSC_FALSE;
4860   merge->bi         = bi;
4861   merge->bj         = bj;
4862   merge->buf_ri     = buf_ri;
4863   merge->buf_rj     = buf_rj;
4864   merge->coi        = NULL;
4865   merge->coj        = NULL;
4866   merge->owners_co  = NULL;
4867 
4868   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4869 
4870   /* attach the supporting struct to B_mpi for reuse */
4871   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4872   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4873   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4874   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4875   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4876   *mpimat = B_mpi;
4877 
4878   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4879   PetscFunctionReturn(0);
4880 }
4881 
4882 /*@C
4883       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4884                  matrices from each processor
4885 
4886     Collective
4887 
4888    Input Parameters:
4889 +    comm - the communicators the parallel matrix will live on
4890 .    seqmat - the input sequential matrices
4891 .    m - number of local rows (or PETSC_DECIDE)
4892 .    n - number of local columns (or PETSC_DECIDE)
4893 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4894 
4895    Output Parameter:
4896 .    mpimat - the parallel matrix generated
4897 
4898     Level: advanced
4899 
4900    Notes:
4901      The dimensions of the sequential matrix in each processor MUST be the same.
4902      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4903      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4904 @*/
4905 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4906 {
4907   PetscErrorCode ierr;
4908   PetscMPIInt    size;
4909 
4910   PetscFunctionBegin;
4911   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4912   if (size == 1) {
4913     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4914     if (scall == MAT_INITIAL_MATRIX) {
4915       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4916     } else {
4917       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4918     }
4919     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920     PetscFunctionReturn(0);
4921   }
4922   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4923   if (scall == MAT_INITIAL_MATRIX) {
4924     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4925   }
4926   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4927   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4928   PetscFunctionReturn(0);
4929 }
4930 
4931 /*@
4932      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4933           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4934           with MatGetSize()
4935 
4936     Not Collective
4937 
4938    Input Parameters:
4939 +    A - the matrix
4940 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4941 
4942    Output Parameter:
4943 .    A_loc - the local sequential matrix generated
4944 
4945     Level: developer
4946 
4947    Notes:
4948      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4949      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4950      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4951      modify the values of the returned A_loc.
4952 
4953 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4954 
4955 @*/
4956 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4957 {
4958   PetscErrorCode ierr;
4959   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4960   Mat_SeqAIJ     *mat,*a,*b;
4961   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4962   MatScalar      *aa,*ba,*cam;
4963   PetscScalar    *ca;
4964   PetscMPIInt    size;
4965   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4966   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4967   PetscBool      match;
4968 
4969   PetscFunctionBegin;
4970   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4971   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4972   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4973   if (size == 1) {
4974     if (scall == MAT_INITIAL_MATRIX) {
4975       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4976       *A_loc = mpimat->A;
4977     } else if (scall == MAT_REUSE_MATRIX) {
4978       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4979     }
4980     PetscFunctionReturn(0);
4981   }
4982 
4983   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4984   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4985   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4986   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4987   aa = a->a; ba = b->a;
4988   if (scall == MAT_INITIAL_MATRIX) {
4989     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4990     ci[0] = 0;
4991     for (i=0; i<am; i++) {
4992       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4993     }
4994     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4995     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4996     k    = 0;
4997     for (i=0; i<am; i++) {
4998       ncols_o = bi[i+1] - bi[i];
4999       ncols_d = ai[i+1] - ai[i];
5000       /* off-diagonal portion of A */
5001       for (jo=0; jo<ncols_o; jo++) {
5002         col = cmap[*bj];
5003         if (col >= cstart) break;
5004         cj[k]   = col; bj++;
5005         ca[k++] = *ba++;
5006       }
5007       /* diagonal portion of A */
5008       for (j=0; j<ncols_d; j++) {
5009         cj[k]   = cstart + *aj++;
5010         ca[k++] = *aa++;
5011       }
5012       /* off-diagonal portion of A */
5013       for (j=jo; j<ncols_o; j++) {
5014         cj[k]   = cmap[*bj++];
5015         ca[k++] = *ba++;
5016       }
5017     }
5018     /* put together the new matrix */
5019     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5020     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5021     /* Since these are PETSc arrays, change flags to free them as necessary. */
5022     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5023     mat->free_a  = PETSC_TRUE;
5024     mat->free_ij = PETSC_TRUE;
5025     mat->nonew   = 0;
5026   } else if (scall == MAT_REUSE_MATRIX) {
5027     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5028     ci = mat->i; cj = mat->j; cam = mat->a;
5029     for (i=0; i<am; i++) {
5030       /* off-diagonal portion of A */
5031       ncols_o = bi[i+1] - bi[i];
5032       for (jo=0; jo<ncols_o; jo++) {
5033         col = cmap[*bj];
5034         if (col >= cstart) break;
5035         *cam++ = *ba++; bj++;
5036       }
5037       /* diagonal portion of A */
5038       ncols_d = ai[i+1] - ai[i];
5039       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5040       /* off-diagonal portion of A */
5041       for (j=jo; j<ncols_o; j++) {
5042         *cam++ = *ba++; bj++;
5043       }
5044     }
5045   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5046   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5047   PetscFunctionReturn(0);
5048 }
5049 
5050 /*@C
5051      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5052 
5053     Not Collective
5054 
5055    Input Parameters:
5056 +    A - the matrix
5057 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5058 -    row, col - index sets of rows and columns to extract (or NULL)
5059 
5060    Output Parameter:
5061 .    A_loc - the local sequential matrix generated
5062 
5063     Level: developer
5064 
5065 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5066 
5067 @*/
5068 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5069 {
5070   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5071   PetscErrorCode ierr;
5072   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5073   IS             isrowa,iscola;
5074   Mat            *aloc;
5075   PetscBool      match;
5076 
5077   PetscFunctionBegin;
5078   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5079   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5080   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5081   if (!row) {
5082     start = A->rmap->rstart; end = A->rmap->rend;
5083     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5084   } else {
5085     isrowa = *row;
5086   }
5087   if (!col) {
5088     start = A->cmap->rstart;
5089     cmap  = a->garray;
5090     nzA   = a->A->cmap->n;
5091     nzB   = a->B->cmap->n;
5092     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5093     ncols = 0;
5094     for (i=0; i<nzB; i++) {
5095       if (cmap[i] < start) idx[ncols++] = cmap[i];
5096       else break;
5097     }
5098     imark = i;
5099     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5100     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5101     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5102   } else {
5103     iscola = *col;
5104   }
5105   if (scall != MAT_INITIAL_MATRIX) {
5106     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5107     aloc[0] = *A_loc;
5108   }
5109   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5110   if (!col) { /* attach global id of condensed columns */
5111     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5112   }
5113   *A_loc = aloc[0];
5114   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5115   if (!row) {
5116     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5117   }
5118   if (!col) {
5119     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5120   }
5121   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5122   PetscFunctionReturn(0);
5123 }
5124 
5125 /*
5126  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5127  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5128  * on a global size.
5129  * */
5130 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5131 {
5132   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5133   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5134   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5135   PetscMPIInt              owner;
5136   PetscSFNode              *iremote,*oiremote;
5137   const PetscInt           *lrowindices;
5138   PetscErrorCode           ierr;
5139   PetscSF                  sf,osf;
5140   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5141   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5142   MPI_Comm                 comm;
5143   ISLocalToGlobalMapping   mapping;
5144 
5145   PetscFunctionBegin;
5146   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5147   /* plocalsize is the number of roots
5148    * nrows is the number of leaves
5149    * */
5150   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5151   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5152   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5153   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5154   for (i=0;i<nrows;i++) {
5155     /* Find a remote index and an owner for a row
5156      * The row could be local or remote
5157      * */
5158     owner = 0;
5159     lidx  = 0;
5160     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5161     iremote[i].index = lidx;
5162     iremote[i].rank  = owner;
5163   }
5164   /* Create SF to communicate how many nonzero columns for each row */
5165   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5166   /* SF will figure out the number of nonzero colunms for each row, and their
5167    * offsets
5168    * */
5169   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5170   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5171   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5172 
5173   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5174   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5175   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5176   roffsets[0] = 0;
5177   roffsets[1] = 0;
5178   for (i=0;i<plocalsize;i++) {
5179     /* diag */
5180     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5181     /* off diag */
5182     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5183     /* compute offsets so that we relative location for each row */
5184     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5185     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5186   }
5187   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5188   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5189   /* 'r' means root, and 'l' means leaf */
5190   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5191   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5192   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5193   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5194   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5195   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5196   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5197   dntotalcols = 0;
5198   ontotalcols = 0;
5199   ncol = 0;
5200   for (i=0;i<nrows;i++) {
5201     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5202     ncol = PetscMax(pnnz[i],ncol);
5203     /* diag */
5204     dntotalcols += nlcols[i*2+0];
5205     /* off diag */
5206     ontotalcols += nlcols[i*2+1];
5207   }
5208   /* We do not need to figure the right number of columns
5209    * since all the calculations will be done by going through the raw data
5210    * */
5211   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5212   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5213   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5214   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5215   /* diag */
5216   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5217   /* off diag */
5218   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5219   /* diag */
5220   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5221   /* off diag */
5222   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5223   dntotalcols = 0;
5224   ontotalcols = 0;
5225   ntotalcols  = 0;
5226   for (i=0;i<nrows;i++) {
5227     owner = 0;
5228     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5229     /* Set iremote for diag matrix */
5230     for (j=0;j<nlcols[i*2+0];j++) {
5231       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5232       iremote[dntotalcols].rank    = owner;
5233       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5234       ilocal[dntotalcols++]        = ntotalcols++;
5235     }
5236     /* off diag */
5237     for (j=0;j<nlcols[i*2+1];j++) {
5238       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5239       oiremote[ontotalcols].rank    = owner;
5240       oilocal[ontotalcols++]        = ntotalcols++;
5241     }
5242   }
5243   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5244   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5245   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5246   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5247   /* P serves as roots and P_oth is leaves
5248    * Diag matrix
5249    * */
5250   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5251   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5252   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5253 
5254   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5255   /* Off diag */
5256   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5257   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5258   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5259   /* We operate on the matrix internal data for saving memory */
5260   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5261   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5262   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5263   /* Convert to global indices for diag matrix */
5264   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5265   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5266   /* We want P_oth store global indices */
5267   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5268   /* Use memory scalable approach */
5269   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5270   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5271   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5272   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5273   /* Convert back to local indices */
5274   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5275   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5276   nout = 0;
5277   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5278   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5279   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5280   /* Exchange values */
5281   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5282   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5283   /* Stop PETSc from shrinking memory */
5284   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5285   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5286   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5287   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5288   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5289   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5290   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5291   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5292   PetscFunctionReturn(0);
5293 }
5294 
5295 /*
5296  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5297  * This supports MPIAIJ and MAIJ
5298  * */
5299 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5300 {
5301   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5302   Mat_SeqAIJ            *p_oth;
5303   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5304   IS                    rows,map;
5305   PetscHMapI            hamp;
5306   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5307   MPI_Comm              comm;
5308   PetscSF               sf,osf;
5309   PetscBool             has;
5310   PetscErrorCode        ierr;
5311 
5312   PetscFunctionBegin;
5313   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5314   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5315   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5316    *  and then create a submatrix (that often is an overlapping matrix)
5317    * */
5318   if (reuse == MAT_INITIAL_MATRIX) {
5319     /* Use a hash table to figure out unique keys */
5320     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5321     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5322     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5323     count = 0;
5324     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5325     for (i=0;i<a->B->cmap->n;i++) {
5326       key  = a->garray[i]/dof;
5327       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5328       if (!has) {
5329         mapping[i] = count;
5330         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5331       } else {
5332         /* Current 'i' has the same value the previous step */
5333         mapping[i] = count-1;
5334       }
5335     }
5336     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5337     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5338     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5339     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5340     off = 0;
5341     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5342     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5343     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5344     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5345     /* In case, the matrix was already created but users want to recreate the matrix */
5346     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5347     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5348     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5349     ierr = ISDestroy(&map);CHKERRQ(ierr);
5350     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5351   } else if (reuse == MAT_REUSE_MATRIX) {
5352     /* If matrix was already created, we simply update values using SF objects
5353      * that as attached to the matrix ealier.
5354      *  */
5355     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5356     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5357     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5358     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5359     /* Update values in place */
5360     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5361     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5362     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5363     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5364   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5365   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5366   PetscFunctionReturn(0);
5367 }
5368 
5369 /*@C
5370     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5371 
5372     Collective on Mat
5373 
5374    Input Parameters:
5375 +    A,B - the matrices in mpiaij format
5376 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5377 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5378 
5379    Output Parameter:
5380 +    rowb, colb - index sets of rows and columns of B to extract
5381 -    B_seq - the sequential matrix generated
5382 
5383     Level: developer
5384 
5385 @*/
5386 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5387 {
5388   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5389   PetscErrorCode ierr;
5390   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5391   IS             isrowb,iscolb;
5392   Mat            *bseq=NULL;
5393 
5394   PetscFunctionBegin;
5395   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5396     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5397   }
5398   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5399 
5400   if (scall == MAT_INITIAL_MATRIX) {
5401     start = A->cmap->rstart;
5402     cmap  = a->garray;
5403     nzA   = a->A->cmap->n;
5404     nzB   = a->B->cmap->n;
5405     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5406     ncols = 0;
5407     for (i=0; i<nzB; i++) {  /* row < local row index */
5408       if (cmap[i] < start) idx[ncols++] = cmap[i];
5409       else break;
5410     }
5411     imark = i;
5412     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5413     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5414     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5415     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5416   } else {
5417     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5418     isrowb  = *rowb; iscolb = *colb;
5419     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5420     bseq[0] = *B_seq;
5421   }
5422   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5423   *B_seq = bseq[0];
5424   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5425   if (!rowb) {
5426     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5427   } else {
5428     *rowb = isrowb;
5429   }
5430   if (!colb) {
5431     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5432   } else {
5433     *colb = iscolb;
5434   }
5435   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5436   PetscFunctionReturn(0);
5437 }
5438 
5439 /*
5440     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5441     of the OFF-DIAGONAL portion of local A
5442 
5443     Collective on Mat
5444 
5445    Input Parameters:
5446 +    A,B - the matrices in mpiaij format
5447 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5448 
5449    Output Parameter:
5450 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5451 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5452 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5453 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5454 
5455     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5456      for this matrix. This is not desirable..
5457 
5458     Level: developer
5459 
5460 */
5461 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5462 {
5463   PetscErrorCode         ierr;
5464   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5465   Mat_SeqAIJ             *b_oth;
5466   VecScatter             ctx;
5467   MPI_Comm               comm;
5468   const PetscMPIInt      *rprocs,*sprocs;
5469   const PetscInt         *srow,*rstarts,*sstarts;
5470   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5471   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5472   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5473   MPI_Request            *rwaits = NULL,*swaits = NULL;
5474   MPI_Status             rstatus;
5475   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5476 
5477   PetscFunctionBegin;
5478   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5479   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5480 
5481   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5482     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5483   }
5484   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5485   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5486 
5487   if (size == 1) {
5488     startsj_s = NULL;
5489     bufa_ptr  = NULL;
5490     *B_oth    = NULL;
5491     PetscFunctionReturn(0);
5492   }
5493 
5494   ctx = a->Mvctx;
5495   tag = ((PetscObject)ctx)->tag;
5496 
5497   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5498   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5499   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5500   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5501   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5502   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5503   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5504 
5505   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5506   if (scall == MAT_INITIAL_MATRIX) {
5507     /* i-array */
5508     /*---------*/
5509     /*  post receives */
5510     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5511     for (i=0; i<nrecvs; i++) {
5512       rowlen = rvalues + rstarts[i]*rbs;
5513       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5514       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5515     }
5516 
5517     /* pack the outgoing message */
5518     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5519 
5520     sstartsj[0] = 0;
5521     rstartsj[0] = 0;
5522     len         = 0; /* total length of j or a array to be sent */
5523     if (nsends) {
5524       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5525       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5526     }
5527     for (i=0; i<nsends; i++) {
5528       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5529       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5530       for (j=0; j<nrows; j++) {
5531         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5532         for (l=0; l<sbs; l++) {
5533           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5534 
5535           rowlen[j*sbs+l] = ncols;
5536 
5537           len += ncols;
5538           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5539         }
5540         k++;
5541       }
5542       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5543 
5544       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5545     }
5546     /* recvs and sends of i-array are completed */
5547     i = nrecvs;
5548     while (i--) {
5549       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5550     }
5551     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5552     ierr = PetscFree(svalues);CHKERRQ(ierr);
5553 
5554     /* allocate buffers for sending j and a arrays */
5555     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5556     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5557 
5558     /* create i-array of B_oth */
5559     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5560 
5561     b_othi[0] = 0;
5562     len       = 0; /* total length of j or a array to be received */
5563     k         = 0;
5564     for (i=0; i<nrecvs; i++) {
5565       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5566       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5567       for (j=0; j<nrows; j++) {
5568         b_othi[k+1] = b_othi[k] + rowlen[j];
5569         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5570         k++;
5571       }
5572       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5573     }
5574     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5575 
5576     /* allocate space for j and a arrrays of B_oth */
5577     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5578     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5579 
5580     /* j-array */
5581     /*---------*/
5582     /*  post receives of j-array */
5583     for (i=0; i<nrecvs; i++) {
5584       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5585       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5586     }
5587 
5588     /* pack the outgoing message j-array */
5589     if (nsends) k = sstarts[0];
5590     for (i=0; i<nsends; i++) {
5591       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5592       bufJ  = bufj+sstartsj[i];
5593       for (j=0; j<nrows; j++) {
5594         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5595         for (ll=0; ll<sbs; ll++) {
5596           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5597           for (l=0; l<ncols; l++) {
5598             *bufJ++ = cols[l];
5599           }
5600           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5601         }
5602       }
5603       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5604     }
5605 
5606     /* recvs and sends of j-array are completed */
5607     i = nrecvs;
5608     while (i--) {
5609       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5610     }
5611     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5612   } else if (scall == MAT_REUSE_MATRIX) {
5613     sstartsj = *startsj_s;
5614     rstartsj = *startsj_r;
5615     bufa     = *bufa_ptr;
5616     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5617     b_otha   = b_oth->a;
5618   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5619 
5620   /* a-array */
5621   /*---------*/
5622   /*  post receives of a-array */
5623   for (i=0; i<nrecvs; i++) {
5624     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5625     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5626   }
5627 
5628   /* pack the outgoing message a-array */
5629   if (nsends) k = sstarts[0];
5630   for (i=0; i<nsends; i++) {
5631     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5632     bufA  = bufa+sstartsj[i];
5633     for (j=0; j<nrows; j++) {
5634       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5635       for (ll=0; ll<sbs; ll++) {
5636         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5637         for (l=0; l<ncols; l++) {
5638           *bufA++ = vals[l];
5639         }
5640         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5641       }
5642     }
5643     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5644   }
5645   /* recvs and sends of a-array are completed */
5646   i = nrecvs;
5647   while (i--) {
5648     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5649   }
5650   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5651   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5652 
5653   if (scall == MAT_INITIAL_MATRIX) {
5654     /* put together the new matrix */
5655     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5656 
5657     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5658     /* Since these are PETSc arrays, change flags to free them as necessary. */
5659     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5660     b_oth->free_a  = PETSC_TRUE;
5661     b_oth->free_ij = PETSC_TRUE;
5662     b_oth->nonew   = 0;
5663 
5664     ierr = PetscFree(bufj);CHKERRQ(ierr);
5665     if (!startsj_s || !bufa_ptr) {
5666       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5667       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5668     } else {
5669       *startsj_s = sstartsj;
5670       *startsj_r = rstartsj;
5671       *bufa_ptr  = bufa;
5672     }
5673   }
5674 
5675   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5676   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5677   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5678   PetscFunctionReturn(0);
5679 }
5680 
5681 /*@C
5682   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5683 
5684   Not Collective
5685 
5686   Input Parameters:
5687 . A - The matrix in mpiaij format
5688 
5689   Output Parameter:
5690 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5691 . colmap - A map from global column index to local index into lvec
5692 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5693 
5694   Level: developer
5695 
5696 @*/
5697 #if defined(PETSC_USE_CTABLE)
5698 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5699 #else
5700 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5701 #endif
5702 {
5703   Mat_MPIAIJ *a;
5704 
5705   PetscFunctionBegin;
5706   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5707   PetscValidPointer(lvec, 2);
5708   PetscValidPointer(colmap, 3);
5709   PetscValidPointer(multScatter, 4);
5710   a = (Mat_MPIAIJ*) A->data;
5711   if (lvec) *lvec = a->lvec;
5712   if (colmap) *colmap = a->colmap;
5713   if (multScatter) *multScatter = a->Mvctx;
5714   PetscFunctionReturn(0);
5715 }
5716 
5717 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5718 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5719 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5720 #if defined(PETSC_HAVE_MKL_SPARSE)
5721 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5722 #endif
5723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5724 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5725 #if defined(PETSC_HAVE_ELEMENTAL)
5726 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5727 #endif
5728 #if defined(PETSC_HAVE_HYPRE)
5729 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5730 #endif
5731 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5732 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5733 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5734 
5735 /*
5736     Computes (B'*A')' since computing B*A directly is untenable
5737 
5738                n                       p                          p
5739         (              )       (              )         (                  )
5740       m (      A       )  *  n (       B      )   =   m (         C        )
5741         (              )       (              )         (                  )
5742 
5743 */
5744 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5745 {
5746   PetscErrorCode ierr;
5747   Mat            At,Bt,Ct;
5748 
5749   PetscFunctionBegin;
5750   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5751   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5752   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5753   ierr = MatDestroy(&At);CHKERRQ(ierr);
5754   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5755   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5756   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5757   PetscFunctionReturn(0);
5758 }
5759 
5760 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5761 {
5762   PetscErrorCode ierr;
5763   PetscBool      cisdense;
5764 
5765   PetscFunctionBegin;
5766   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5767   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5768   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5769   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5770   if (!cisdense) {
5771     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5772   }
5773   ierr = MatSetUp(C);CHKERRQ(ierr);
5774 
5775   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5776   PetscFunctionReturn(0);
5777 }
5778 
5779 /* ----------------------------------------------------------------*/
5780 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5781 {
5782   Mat_Product *product = C->product;
5783   Mat         A = product->A,B=product->B;
5784 
5785   PetscFunctionBegin;
5786   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5787     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5788 
5789   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5790   C->ops->productsymbolic = MatProductSymbolic_AB;
5791   PetscFunctionReturn(0);
5792 }
5793 
5794 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5795 {
5796   PetscErrorCode ierr;
5797   Mat_Product    *product = C->product;
5798 
5799   PetscFunctionBegin;
5800   if (product->type == MATPRODUCT_AB) {
5801     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5802   }
5803   PetscFunctionReturn(0);
5804 }
5805 /* ----------------------------------------------------------------*/
5806 
5807 /*MC
5808    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5809 
5810    Options Database Keys:
5811 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5812 
5813    Level: beginner
5814 
5815    Notes:
5816     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5817     in this case the values associated with the rows and columns one passes in are set to zero
5818     in the matrix
5819 
5820     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5821     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5822 
5823 .seealso: MatCreateAIJ()
5824 M*/
5825 
5826 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5827 {
5828   Mat_MPIAIJ     *b;
5829   PetscErrorCode ierr;
5830   PetscMPIInt    size;
5831 
5832   PetscFunctionBegin;
5833   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5834 
5835   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5836   B->data       = (void*)b;
5837   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5838   B->assembled  = PETSC_FALSE;
5839   B->insertmode = NOT_SET_VALUES;
5840   b->size       = size;
5841 
5842   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5843 
5844   /* build cache for off array entries formed */
5845   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5846 
5847   b->donotstash  = PETSC_FALSE;
5848   b->colmap      = 0;
5849   b->garray      = 0;
5850   b->roworiented = PETSC_TRUE;
5851 
5852   /* stuff used for matrix vector multiply */
5853   b->lvec  = NULL;
5854   b->Mvctx = NULL;
5855 
5856   /* stuff for MatGetRow() */
5857   b->rowindices   = 0;
5858   b->rowvalues    = 0;
5859   b->getrowactive = PETSC_FALSE;
5860 
5861   /* flexible pointer used in CUSP/CUSPARSE classes */
5862   b->spptr = NULL;
5863 
5864   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5868   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5869   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5870   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5874 #if defined(PETSC_HAVE_MKL_SPARSE)
5875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5876 #endif
5877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5879   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5880 #if defined(PETSC_HAVE_ELEMENTAL)
5881   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5882 #endif
5883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5884   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5885 #if defined(PETSC_HAVE_HYPRE)
5886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5888 #endif
5889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5890   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5891   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5892   PetscFunctionReturn(0);
5893 }
5894 
5895 /*@C
5896      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5897          and "off-diagonal" part of the matrix in CSR format.
5898 
5899    Collective
5900 
5901    Input Parameters:
5902 +  comm - MPI communicator
5903 .  m - number of local rows (Cannot be PETSC_DECIDE)
5904 .  n - This value should be the same as the local size used in creating the
5905        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5906        calculated if N is given) For square matrices n is almost always m.
5907 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5908 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5909 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5910 .   j - column indices
5911 .   a - matrix values
5912 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5913 .   oj - column indices
5914 -   oa - matrix values
5915 
5916    Output Parameter:
5917 .   mat - the matrix
5918 
5919    Level: advanced
5920 
5921    Notes:
5922        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5923        must free the arrays once the matrix has been destroyed and not before.
5924 
5925        The i and j indices are 0 based
5926 
5927        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5928 
5929        This sets local rows and cannot be used to set off-processor values.
5930 
5931        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5932        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5933        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5934        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5935        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5936        communication if it is known that only local entries will be set.
5937 
5938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5939           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5940 @*/
5941 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5942 {
5943   PetscErrorCode ierr;
5944   Mat_MPIAIJ     *maij;
5945 
5946   PetscFunctionBegin;
5947   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5948   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5949   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5950   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5951   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5952   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5953   maij = (Mat_MPIAIJ*) (*mat)->data;
5954 
5955   (*mat)->preallocated = PETSC_TRUE;
5956 
5957   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5958   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5959 
5960   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5961   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5962 
5963   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5964   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5965   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5966   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5967 
5968   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5969   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5970   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5971   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5972   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5973   PetscFunctionReturn(0);
5974 }
5975 
5976 /*
5977     Special version for direct calls from Fortran
5978 */
5979 #include <petsc/private/fortranimpl.h>
5980 
5981 /* Change these macros so can be used in void function */
5982 #undef CHKERRQ
5983 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5984 #undef SETERRQ2
5985 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5986 #undef SETERRQ3
5987 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5988 #undef SETERRQ
5989 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5990 
5991 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5992 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5994 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5995 #else
5996 #endif
5997 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5998 {
5999   Mat            mat  = *mmat;
6000   PetscInt       m    = *mm, n = *mn;
6001   InsertMode     addv = *maddv;
6002   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6003   PetscScalar    value;
6004   PetscErrorCode ierr;
6005 
6006   MatCheckPreallocated(mat,1);
6007   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6008   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6009   {
6010     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6011     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6012     PetscBool roworiented = aij->roworiented;
6013 
6014     /* Some Variables required in the macro */
6015     Mat        A                    = aij->A;
6016     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6017     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6018     MatScalar  *aa                  = a->a;
6019     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6020     Mat        B                    = aij->B;
6021     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6022     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6023     MatScalar  *ba                  = b->a;
6024     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6025      * cannot use "#if defined" inside a macro. */
6026     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6027 
6028     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6029     PetscInt  nonew = a->nonew;
6030     MatScalar *ap1,*ap2;
6031 
6032     PetscFunctionBegin;
6033     for (i=0; i<m; i++) {
6034       if (im[i] < 0) continue;
6035       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6036       if (im[i] >= rstart && im[i] < rend) {
6037         row      = im[i] - rstart;
6038         lastcol1 = -1;
6039         rp1      = aj + ai[row];
6040         ap1      = aa + ai[row];
6041         rmax1    = aimax[row];
6042         nrow1    = ailen[row];
6043         low1     = 0;
6044         high1    = nrow1;
6045         lastcol2 = -1;
6046         rp2      = bj + bi[row];
6047         ap2      = ba + bi[row];
6048         rmax2    = bimax[row];
6049         nrow2    = bilen[row];
6050         low2     = 0;
6051         high2    = nrow2;
6052 
6053         for (j=0; j<n; j++) {
6054           if (roworiented) value = v[i*n+j];
6055           else value = v[i+j*m];
6056           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6057           if (in[j] >= cstart && in[j] < cend) {
6058             col = in[j] - cstart;
6059             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6060 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6061             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6062 #endif
6063           } else if (in[j] < 0) continue;
6064           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6065             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6066             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6067           } else {
6068             if (mat->was_assembled) {
6069               if (!aij->colmap) {
6070                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6071               }
6072 #if defined(PETSC_USE_CTABLE)
6073               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6074               col--;
6075 #else
6076               col = aij->colmap[in[j]] - 1;
6077 #endif
6078               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6079                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6080                 col  =  in[j];
6081                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6082                 B        = aij->B;
6083                 b        = (Mat_SeqAIJ*)B->data;
6084                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6085                 rp2      = bj + bi[row];
6086                 ap2      = ba + bi[row];
6087                 rmax2    = bimax[row];
6088                 nrow2    = bilen[row];
6089                 low2     = 0;
6090                 high2    = nrow2;
6091                 bm       = aij->B->rmap->n;
6092                 ba       = b->a;
6093                 inserted = PETSC_FALSE;
6094               }
6095             } else col = in[j];
6096             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6097 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6098             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6099 #endif
6100           }
6101         }
6102       } else if (!aij->donotstash) {
6103         if (roworiented) {
6104           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6105         } else {
6106           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6107         }
6108       }
6109     }
6110   }
6111   PetscFunctionReturnVoid();
6112 }
6113