xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 39cc5b3ca58c2f0ae3e2b3b64b0ae7bc4af7263c)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
584 #endif
585     if (im[i] >= rstart && im[i] < rend) {
586       row      = im[i] - rstart;
587       lastcol1 = -1;
588       rp1      = aj + ai[row];
589       ap1      = aa + ai[row];
590       rmax1    = aimax[row];
591       nrow1    = ailen[row];
592       low1     = 0;
593       high1    = nrow1;
594       lastcol2 = -1;
595       rp2      = bj + bi[row];
596       ap2      = ba + bi[row];
597       rmax2    = bimax[row];
598       nrow2    = bilen[row];
599       low2     = 0;
600       high2    = nrow2;
601 
602       for (j=0; j<n; j++) {
603         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
604         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
605         if (in[j] >= cstart && in[j] < cend) {
606           col   = in[j] - cstart;
607           nonew = a->nonew;
608           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
610           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
611 #endif
612         } else if (in[j] < 0) continue;
613 #if defined(PETSC_USE_DEBUG)
614         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
615 #endif
616         else {
617           if (mat->was_assembled) {
618             if (!aij->colmap) {
619               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
620             }
621 #if defined(PETSC_USE_CTABLE)
622             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
623             col--;
624 #else
625             col = aij->colmap[in[j]] - 1;
626 #endif
627             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
628               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
629               col  =  in[j];
630               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
631               B        = aij->B;
632               b        = (Mat_SeqAIJ*)B->data;
633               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
634               rp2      = bj + bi[row];
635               ap2      = ba + bi[row];
636               rmax2    = bimax[row];
637               nrow2    = bilen[row];
638               low2     = 0;
639               high2    = nrow2;
640               bm       = aij->B->rmap->n;
641               ba       = b->a;
642               inserted = PETSC_FALSE;
643             } else if (col < 0) {
644               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
645                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
646               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
647             }
648           } else col = in[j];
649           nonew = b->nonew;
650           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
652           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
653 #endif
654         }
655       }
656     } else {
657       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
658       if (!aij->donotstash) {
659         mat->assembled = PETSC_FALSE;
660         if (roworiented) {
661           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
662         } else {
663           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
664         }
665       }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
673     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
674     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
675 */
676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
677 {
678   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
679   Mat            A           = aij->A; /* diagonal part of the matrix */
680   Mat            B           = aij->B; /* offdiagonal part of the matrix */
681   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
682   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
683   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
684   PetscInt       *ailen      = a->ilen,*aj = a->j;
685   PetscInt       *bilen      = b->ilen,*bj = b->j;
686   PetscInt       am          = aij->A->rmap->n,j;
687   PetscInt       diag_so_far = 0,dnz;
688   PetscInt       offd_so_far = 0,onz;
689 
690   PetscFunctionBegin;
691   /* Iterate over all rows of the matrix */
692   for (j=0; j<am; j++) {
693     dnz = onz = 0;
694     /*  Iterate over all non-zero columns of the current row */
695     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
696       /* If column is in the diagonal */
697       if (mat_j[col] >= cstart && mat_j[col] < cend) {
698         aj[diag_so_far++] = mat_j[col] - cstart;
699         dnz++;
700       } else { /* off-diagonal entries */
701         bj[offd_so_far++] = mat_j[col];
702         onz++;
703       }
704     }
705     ailen[j] = dnz;
706     bilen[j] = onz;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 /*
712     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
713     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
714     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
715     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
716     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
717 */
718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
719 {
720   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
721   Mat            A      = aij->A; /* diagonal part of the matrix */
722   Mat            B      = aij->B; /* offdiagonal part of the matrix */
723   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
724   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
725   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
727   PetscInt       *ailen = a->ilen,*aj = a->j;
728   PetscInt       *bilen = b->ilen,*bj = b->j;
729   PetscInt       am     = aij->A->rmap->n,j;
730   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
731   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
732   PetscScalar    *aa = a->a,*ba = b->a;
733 
734   PetscFunctionBegin;
735   /* Iterate over all rows of the matrix */
736   for (j=0; j<am; j++) {
737     dnz_row = onz_row = 0;
738     rowstart_offd = full_offd_i[j];
739     rowstart_diag = full_diag_i[j];
740     /*  Iterate over all non-zero columns of the current row */
741     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
742       /* If column is in the diagonal */
743       if (mat_j[col] >= cstart && mat_j[col] < cend) {
744         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
745         aa[rowstart_diag+dnz_row] = mat_a[col];
746         dnz_row++;
747       } else { /* off-diagonal entries */
748         bj[rowstart_offd+onz_row] = mat_j[col];
749         ba[rowstart_offd+onz_row] = mat_a[col];
750         onz_row++;
751       }
752     }
753     ailen[j] = dnz_row;
754     bilen[j] = onz_row;
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
764   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
765 
766   PetscFunctionBegin;
767   for (i=0; i<m; i++) {
768     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
769     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
770     if (idxm[i] >= rstart && idxm[i] < rend) {
771       row = idxm[i] - rstart;
772       for (j=0; j<n; j++) {
773         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
774         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
775         if (idxn[j] >= cstart && idxn[j] < cend) {
776           col  = idxn[j] - cstart;
777           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
778         } else {
779           if (!aij->colmap) {
780             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
781           }
782 #if defined(PETSC_USE_CTABLE)
783           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
784           col--;
785 #else
786           col = aij->colmap[idxn[j]] - 1;
787 #endif
788           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
789           else {
790             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
800 
801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   PetscErrorCode ierr;
805   PetscInt       nstash,reallocs;
806 
807   PetscFunctionBegin;
808   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
812   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
819   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
820   PetscErrorCode ierr;
821   PetscMPIInt    n;
822   PetscInt       i,j,rstart,ncols,flg;
823   PetscInt       *row,*col;
824   PetscBool      other_disassembled;
825   PetscScalar    *val;
826 
827   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
828 
829   PetscFunctionBegin;
830   if (!aij->donotstash && !mat->nooffprocentries) {
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for (i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for (j=i,rstart=row[j]; j<n; j++) {
838           if (row[j] != rstart) break;
839         }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         /* Now assemble all these values with a single function call */
843         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
844 
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849   }
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113 
1114   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135   VecScatter     Mvctx = a->Mvctx;
1136 
1137   PetscFunctionBegin;
1138   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1139   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   /* do nondiagonal part */
1153   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1154   /* do local part */
1155   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1156   /* add partial results together */
1157   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1163 {
1164   MPI_Comm       comm;
1165   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1166   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1167   IS             Me,Notme;
1168   PetscErrorCode ierr;
1169   PetscInt       M,N,first,last,*notme,i;
1170   PetscBool      lf;
1171   PetscMPIInt    size;
1172 
1173   PetscFunctionBegin;
1174   /* Easy test: symmetric diagonal block */
1175   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1176   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1177   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1178   if (!*f) PetscFunctionReturn(0);
1179   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1181   if (size == 1) PetscFunctionReturn(0);
1182 
1183   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1184   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1185   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1186   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1187   for (i=0; i<first; i++) notme[i] = i;
1188   for (i=last; i<M; i++) notme[i-last+first] = i;
1189   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1190   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1191   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1192   Aoff = Aoffs[0];
1193   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1194   Boff = Boffs[0];
1195   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1197   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1199   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1200   ierr = PetscFree(notme);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1205 {
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214 {
1215   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   /* do nondiagonal part */
1220   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1221   /* do local part */
1222   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1223   /* add partial results together */
1224   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*
1230   This only works correctly for square matrices where the subblock A->A is the
1231    diagonal block
1232 */
1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1234 {
1235   PetscErrorCode ierr;
1236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1246 {
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1252   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1257 {
1258   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262 #if defined(PETSC_USE_LOG)
1263   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1264 #endif
1265   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1266   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1268   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1269 #if defined(PETSC_USE_CTABLE)
1270   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1271 #else
1272   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1273 #endif
1274   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1275   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1276   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1277   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1278   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1279   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1280   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1281 
1282   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1292 #if defined(PETSC_HAVE_ELEMENTAL)
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1294 #endif
1295 #if defined(PETSC_HAVE_HYPRE)
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1305 {
1306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1307   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1308   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1309   PetscErrorCode ierr;
1310   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1311   int            fd;
1312   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1313   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1314   PetscScalar    *column_values;
1315   PetscInt       message_count,flowcontrolcount;
1316   FILE           *file;
1317 
1318   PetscFunctionBegin;
1319   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1320   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1321   nz   = A->nz + B->nz;
1322   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1323   if (!rank) {
1324     header[0] = MAT_FILE_CLASSID;
1325     header[1] = mat->rmap->N;
1326     header[2] = mat->cmap->N;
1327 
1328     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1329     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     /* get largest number of rows any processor has */
1331     rlen  = mat->rmap->n;
1332     range = mat->rmap->range;
1333     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1334   } else {
1335     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336     rlen = mat->rmap->n;
1337   }
1338 
1339   /* load up the local row counts */
1340   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1341   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1342 
1343   /* store the row lengths to the file */
1344   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1345   if (!rank) {
1346     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       rlen = range[i+1] - range[i];
1350       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1351       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     }
1353     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1354   } else {
1355     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1356     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1358   }
1359   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1360 
1361   /* load up the local column indices */
1362   nzmax = nz; /* th processor needs space a largest processor needs */
1363   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1364   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1365   cnt   = 0;
1366   for (i=0; i<mat->rmap->n; i++) {
1367     for (j=B->i[i]; j<B->i[i+1]; j++) {
1368       if ((col = garray[B->j[j]]) > cstart) break;
1369       column_indices[cnt++] = col;
1370     }
1371     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1372     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1373   }
1374   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1375 
1376   /* store the column indices to the file */
1377   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1378   if (!rank) {
1379     MPI_Status status;
1380     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1381     for (i=1; i<size; i++) {
1382       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1383       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1384       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1385       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1386       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1387     }
1388     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1389   } else {
1390     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1391     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1392     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1393     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1394   }
1395   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1396 
1397   /* load up the local column values */
1398   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1399   cnt  = 0;
1400   for (i=0; i<mat->rmap->n; i++) {
1401     for (j=B->i[i]; j<B->i[i+1]; j++) {
1402       if (garray[B->j[j]] > cstart) break;
1403       column_values[cnt++] = B->a[j];
1404     }
1405     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1406     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1407   }
1408   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1409 
1410   /* store the column values to the file */
1411   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1412   if (!rank) {
1413     MPI_Status status;
1414     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1415     for (i=1; i<size; i++) {
1416       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1417       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1418       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1419       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1420       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1421     }
1422     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1423   } else {
1424     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1425     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1426     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1427     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1428   }
1429   ierr = PetscFree(column_values);CHKERRQ(ierr);
1430 
1431   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1432   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #include <petscdraw.h>
1437 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1438 {
1439   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1440   PetscErrorCode    ierr;
1441   PetscMPIInt       rank = aij->rank,size = aij->size;
1442   PetscBool         isdraw,iascii,isbinary;
1443   PetscViewer       sviewer;
1444   PetscViewerFormat format;
1445 
1446   PetscFunctionBegin;
1447   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1450   if (iascii) {
1451     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1452     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1453       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1454       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1455       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1456       for (i=0; i<(PetscInt)size; i++) {
1457         nmax = PetscMax(nmax,nz[i]);
1458         nmin = PetscMin(nmin,nz[i]);
1459         navg += nz[i];
1460       }
1461       ierr = PetscFree(nz);CHKERRQ(ierr);
1462       navg = navg/size;
1463       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     }
1466     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1467     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1468       MatInfo   info;
1469       PetscBool inodes;
1470 
1471       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1472       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1473       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1475       if (!inodes) {
1476         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1477                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1478       } else {
1479         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1480                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1481       }
1482       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1483       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1484       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1485       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1486       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1489       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1490       PetscFunctionReturn(0);
1491     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1492       PetscInt inodecount,inodelimit,*inodes;
1493       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1494       if (inodes) {
1495         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1496       } else {
1497         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1498       }
1499       PetscFunctionReturn(0);
1500     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1501       PetscFunctionReturn(0);
1502     }
1503   } else if (isbinary) {
1504     if (size == 1) {
1505       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507     } else {
1508       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1509     }
1510     PetscFunctionReturn(0);
1511   } else if (iascii && size == 1) {
1512     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1513     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1514     PetscFunctionReturn(0);
1515   } else if (isdraw) {
1516     PetscDraw draw;
1517     PetscBool isnull;
1518     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1519     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1520     if (isnull) PetscFunctionReturn(0);
1521   }
1522 
1523   { /* assemble the entire matrix onto first processor */
1524     Mat A = NULL, Av;
1525     IS  isrow,iscol;
1526 
1527     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1528     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1529     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1530     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1531 /*  The commented code uses MatCreateSubMatrices instead */
1532 /*
1533     Mat *AA, A = NULL, Av;
1534     IS  isrow,iscol;
1535 
1536     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1537     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1538     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1539     if (!rank) {
1540        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1541        A    = AA[0];
1542        Av   = AA[0];
1543     }
1544     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1545 */
1546     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1547     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1548     /*
1549        Everyone has to call to draw the matrix since the graphics waits are
1550        synchronized across all processors that share the PetscDraw object
1551     */
1552     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1553     if (!rank) {
1554       if (((PetscObject)mat)->name) {
1555         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1556       }
1557       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1558     }
1559     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1560     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(&A);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1567 {
1568   PetscErrorCode ierr;
1569   PetscBool      iascii,isdraw,issocket,isbinary;
1570 
1571   PetscFunctionBegin;
1572   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1573   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1574   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1575   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1576   if (iascii || isdraw || isbinary || issocket) {
1577     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1583 {
1584   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1585   PetscErrorCode ierr;
1586   Vec            bb1 = 0;
1587   PetscBool      hasop;
1588 
1589   PetscFunctionBegin;
1590   if (flag == SOR_APPLY_UPPER) {
1591     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1592     PetscFunctionReturn(0);
1593   }
1594 
1595   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1597   }
1598 
1599   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1600     if (flag & SOR_ZERO_INITIAL_GUESS) {
1601       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602       its--;
1603     }
1604 
1605     while (its--) {
1606       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608 
1609       /* update rhs: bb1 = bb - B*x */
1610       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1611       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1612 
1613       /* local sweep */
1614       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1615     }
1616   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1617     if (flag & SOR_ZERO_INITIAL_GUESS) {
1618       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1619       its--;
1620     }
1621     while (its--) {
1622       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1623       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624 
1625       /* update rhs: bb1 = bb - B*x */
1626       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1627       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1628 
1629       /* local sweep */
1630       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1631     }
1632   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1633     if (flag & SOR_ZERO_INITIAL_GUESS) {
1634       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1635       its--;
1636     }
1637     while (its--) {
1638       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1639       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640 
1641       /* update rhs: bb1 = bb - B*x */
1642       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1643       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1644 
1645       /* local sweep */
1646       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1647     }
1648   } else if (flag & SOR_EISENSTAT) {
1649     Vec xx1;
1650 
1651     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1652     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1653 
1654     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1655     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     if (!mat->diag) {
1657       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1658       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1659     }
1660     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1661     if (hasop) {
1662       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1663     } else {
1664       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1665     }
1666     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1667 
1668     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1669 
1670     /* local sweep */
1671     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1672     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1673     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1674   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1675 
1676   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1677 
1678   matin->factorerrortype = mat->A->factorerrortype;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1683 {
1684   Mat            aA,aB,Aperm;
1685   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1686   PetscScalar    *aa,*ba;
1687   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1688   PetscSF        rowsf,sf;
1689   IS             parcolp = NULL;
1690   PetscBool      done;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1695   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1696   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1697   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1698 
1699   /* Invert row permutation to find out where my rows should go */
1700   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1701   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1702   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1703   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1704   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1705   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1706 
1707   /* Invert column permutation to find out where my columns should go */
1708   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1709   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1710   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1711   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1712   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1713   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1714   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1715 
1716   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1717   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1718   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1719 
1720   /* Find out where my gcols should go */
1721   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1722   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1723   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1724   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1725   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1726   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1727   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1728   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1729 
1730   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1731   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1732   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1733   for (i=0; i<m; i++) {
1734     PetscInt    row = rdest[i];
1735     PetscMPIInt rowner;
1736     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1737     for (j=ai[i]; j<ai[i+1]; j++) {
1738       PetscInt    col = cdest[aj[j]];
1739       PetscMPIInt cowner;
1740       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1741       if (rowner == cowner) dnnz[i]++;
1742       else onnz[i]++;
1743     }
1744     for (j=bi[i]; j<bi[i+1]; j++) {
1745       PetscInt    col = gcdest[bj[j]];
1746       PetscMPIInt cowner;
1747       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1748       if (rowner == cowner) dnnz[i]++;
1749       else onnz[i]++;
1750     }
1751   }
1752   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1753   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1754   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1755   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1756   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1757 
1758   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1759   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1760   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1761   for (i=0; i<m; i++) {
1762     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1763     PetscInt j0,rowlen;
1764     rowlen = ai[i+1] - ai[i];
1765     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1766       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1767       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1768     }
1769     rowlen = bi[i+1] - bi[i];
1770     for (j0=j=0; j<rowlen; j0=j) {
1771       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1772       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1773     }
1774   }
1775   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1776   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1777   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1778   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1779   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1780   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1781   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1782   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1783   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1784   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1785   *B = Aperm;
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1790 {
1791   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1792   PetscErrorCode ierr;
1793 
1794   PetscFunctionBegin;
1795   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1796   if (ghosts) *ghosts = aij->garray;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1801 {
1802   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1803   Mat            A    = mat->A,B = mat->B;
1804   PetscErrorCode ierr;
1805   PetscLogDouble isend[5],irecv[5];
1806 
1807   PetscFunctionBegin;
1808   info->block_size = 1.0;
1809   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1810 
1811   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1812   isend[3] = info->memory;  isend[4] = info->mallocs;
1813 
1814   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1815 
1816   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1817   isend[3] += info->memory;  isend[4] += info->mallocs;
1818   if (flag == MAT_LOCAL) {
1819     info->nz_used      = isend[0];
1820     info->nz_allocated = isend[1];
1821     info->nz_unneeded  = isend[2];
1822     info->memory       = isend[3];
1823     info->mallocs      = isend[4];
1824   } else if (flag == MAT_GLOBAL_MAX) {
1825     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1826 
1827     info->nz_used      = irecv[0];
1828     info->nz_allocated = irecv[1];
1829     info->nz_unneeded  = irecv[2];
1830     info->memory       = irecv[3];
1831     info->mallocs      = irecv[4];
1832   } else if (flag == MAT_GLOBAL_SUM) {
1833     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1834 
1835     info->nz_used      = irecv[0];
1836     info->nz_allocated = irecv[1];
1837     info->nz_unneeded  = irecv[2];
1838     info->memory       = irecv[3];
1839     info->mallocs      = irecv[4];
1840   }
1841   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1842   info->fill_ratio_needed = 0;
1843   info->factor_mallocs    = 0;
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1848 {
1849   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1850   PetscErrorCode ierr;
1851 
1852   PetscFunctionBegin;
1853   switch (op) {
1854   case MAT_NEW_NONZERO_LOCATIONS:
1855   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1856   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1857   case MAT_KEEP_NONZERO_PATTERN:
1858   case MAT_NEW_NONZERO_LOCATION_ERR:
1859   case MAT_USE_INODES:
1860   case MAT_IGNORE_ZERO_ENTRIES:
1861     MatCheckPreallocated(A,1);
1862     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1863     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1864     break;
1865   case MAT_ROW_ORIENTED:
1866     MatCheckPreallocated(A,1);
1867     a->roworiented = flg;
1868 
1869     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1870     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1871     break;
1872   case MAT_NEW_DIAGONALS:
1873   case MAT_SORTED_FULL:
1874     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1875     break;
1876   case MAT_IGNORE_OFF_PROC_ENTRIES:
1877     a->donotstash = flg;
1878     break;
1879   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1880   case MAT_SPD:
1881   case MAT_SYMMETRIC:
1882   case MAT_STRUCTURALLY_SYMMETRIC:
1883   case MAT_HERMITIAN:
1884   case MAT_SYMMETRY_ETERNAL:
1885     break;
1886   case MAT_SUBMAT_SINGLEIS:
1887     A->submat_singleis = flg;
1888     break;
1889   case MAT_STRUCTURE_ONLY:
1890     /* The option is handled directly by MatSetOption() */
1891     break;
1892   default:
1893     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1899 {
1900   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1901   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1902   PetscErrorCode ierr;
1903   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1904   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1905   PetscInt       *cmap,*idx_p;
1906 
1907   PetscFunctionBegin;
1908   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1909   mat->getrowactive = PETSC_TRUE;
1910 
1911   if (!mat->rowvalues && (idx || v)) {
1912     /*
1913         allocate enough space to hold information from the longest row.
1914     */
1915     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1916     PetscInt   max = 1,tmp;
1917     for (i=0; i<matin->rmap->n; i++) {
1918       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1919       if (max < tmp) max = tmp;
1920     }
1921     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1922   }
1923 
1924   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1925   lrow = row - rstart;
1926 
1927   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1928   if (!v)   {pvA = 0; pvB = 0;}
1929   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1930   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1931   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1932   nztot = nzA + nzB;
1933 
1934   cmap = mat->garray;
1935   if (v  || idx) {
1936     if (nztot) {
1937       /* Sort by increasing column numbers, assuming A and B already sorted */
1938       PetscInt imark = -1;
1939       if (v) {
1940         *v = v_p = mat->rowvalues;
1941         for (i=0; i<nzB; i++) {
1942           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1943           else break;
1944         }
1945         imark = i;
1946         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1947         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1948       }
1949       if (idx) {
1950         *idx = idx_p = mat->rowindices;
1951         if (imark > -1) {
1952           for (i=0; i<imark; i++) {
1953             idx_p[i] = cmap[cworkB[i]];
1954           }
1955         } else {
1956           for (i=0; i<nzB; i++) {
1957             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1958             else break;
1959           }
1960           imark = i;
1961         }
1962         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1963         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1964       }
1965     } else {
1966       if (idx) *idx = 0;
1967       if (v)   *v   = 0;
1968     }
1969   }
1970   *nz  = nztot;
1971   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1972   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1977 {
1978   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1979 
1980   PetscFunctionBegin;
1981   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1982   aij->getrowactive = PETSC_FALSE;
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1987 {
1988   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1989   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1990   PetscErrorCode ierr;
1991   PetscInt       i,j,cstart = mat->cmap->rstart;
1992   PetscReal      sum = 0.0;
1993   MatScalar      *v;
1994 
1995   PetscFunctionBegin;
1996   if (aij->size == 1) {
1997     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1998   } else {
1999     if (type == NORM_FROBENIUS) {
2000       v = amat->a;
2001       for (i=0; i<amat->nz; i++) {
2002         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2003       }
2004       v = bmat->a;
2005       for (i=0; i<bmat->nz; i++) {
2006         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2007       }
2008       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2009       *norm = PetscSqrtReal(*norm);
2010       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
2011     } else if (type == NORM_1) { /* max column norm */
2012       PetscReal *tmp,*tmp2;
2013       PetscInt  *jj,*garray = aij->garray;
2014       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
2015       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
2016       *norm = 0.0;
2017       v     = amat->a; jj = amat->j;
2018       for (j=0; j<amat->nz; j++) {
2019         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2020       }
2021       v = bmat->a; jj = bmat->j;
2022       for (j=0; j<bmat->nz; j++) {
2023         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2024       }
2025       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2026       for (j=0; j<mat->cmap->N; j++) {
2027         if (tmp2[j] > *norm) *norm = tmp2[j];
2028       }
2029       ierr = PetscFree(tmp);CHKERRQ(ierr);
2030       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2031       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2032     } else if (type == NORM_INFINITY) { /* max row norm */
2033       PetscReal ntemp = 0.0;
2034       for (j=0; j<aij->A->rmap->n; j++) {
2035         v   = amat->a + amat->i[j];
2036         sum = 0.0;
2037         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2038           sum += PetscAbsScalar(*v); v++;
2039         }
2040         v = bmat->a + bmat->i[j];
2041         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2042           sum += PetscAbsScalar(*v); v++;
2043         }
2044         if (sum > ntemp) ntemp = sum;
2045       }
2046       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2047       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2048     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2049   }
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2054 {
2055   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2056   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2057   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2058   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2059   PetscErrorCode  ierr;
2060   Mat             B,A_diag,*B_diag;
2061   const MatScalar *array;
2062 
2063   PetscFunctionBegin;
2064   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2065   ai = Aloc->i; aj = Aloc->j;
2066   bi = Bloc->i; bj = Bloc->j;
2067   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2068     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2069     PetscSFNode          *oloc;
2070     PETSC_UNUSED PetscSF sf;
2071 
2072     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2073     /* compute d_nnz for preallocation */
2074     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2075     for (i=0; i<ai[ma]; i++) {
2076       d_nnz[aj[i]]++;
2077     }
2078     /* compute local off-diagonal contributions */
2079     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2080     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2081     /* map those to global */
2082     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2083     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2084     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2085     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2086     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2087     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2088     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2089 
2090     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2091     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2092     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2093     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2094     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2095     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2096   } else {
2097     B    = *matout;
2098     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2099   }
2100 
2101   b           = (Mat_MPIAIJ*)B->data;
2102   A_diag      = a->A;
2103   B_diag      = &b->A;
2104   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2105   A_diag_ncol = A_diag->cmap->N;
2106   B_diag_ilen = sub_B_diag->ilen;
2107   B_diag_i    = sub_B_diag->i;
2108 
2109   /* Set ilen for diagonal of B */
2110   for (i=0; i<A_diag_ncol; i++) {
2111     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2112   }
2113 
2114   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2115   very quickly (=without using MatSetValues), because all writes are local. */
2116   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2117 
2118   /* copy over the B part */
2119   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2120   array = Bloc->a;
2121   row   = A->rmap->rstart;
2122   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2123   cols_tmp = cols;
2124   for (i=0; i<mb; i++) {
2125     ncol = bi[i+1]-bi[i];
2126     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2127     row++;
2128     array += ncol; cols_tmp += ncol;
2129   }
2130   ierr = PetscFree(cols);CHKERRQ(ierr);
2131 
2132   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2133   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2135     *matout = B;
2136   } else {
2137     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2143 {
2144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2145   Mat            a    = aij->A,b = aij->B;
2146   PetscErrorCode ierr;
2147   PetscInt       s1,s2,s3;
2148 
2149   PetscFunctionBegin;
2150   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2151   if (rr) {
2152     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2153     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2154     /* Overlap communication with computation. */
2155     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2156   }
2157   if (ll) {
2158     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2159     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2160     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2161   }
2162   /* scale  the diagonal block */
2163   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2164 
2165   if (rr) {
2166     /* Do a scatter end and then right scale the off-diagonal block */
2167     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2168     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2169   }
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2174 {
2175   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2176   PetscErrorCode ierr;
2177 
2178   PetscFunctionBegin;
2179   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2184 {
2185   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2186   Mat            a,b,c,d;
2187   PetscBool      flg;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   a = matA->A; b = matA->B;
2192   c = matB->A; d = matB->B;
2193 
2194   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2195   if (flg) {
2196     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2197   }
2198   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2203 {
2204   PetscErrorCode ierr;
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2207 
2208   PetscFunctionBegin;
2209   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2210   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2211     /* because of the column compression in the off-processor part of the matrix a->B,
2212        the number of columns in a->B and b->B may be different, hence we cannot call
2213        the MatCopy() directly on the two parts. If need be, we can provide a more
2214        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2215        then copying the submatrices */
2216     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2217   } else {
2218     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2219     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2220   }
2221   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2226 {
2227   PetscErrorCode ierr;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 /*
2235    Computes the number of nonzeros per row needed for preallocation when X and Y
2236    have different nonzero structure.
2237 */
2238 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2239 {
2240   PetscInt       i,j,k,nzx,nzy;
2241 
2242   PetscFunctionBegin;
2243   /* Set the number of nonzeros in the new matrix */
2244   for (i=0; i<m; i++) {
2245     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2246     nzx = xi[i+1] - xi[i];
2247     nzy = yi[i+1] - yi[i];
2248     nnz[i] = 0;
2249     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2250       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2251       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2252       nnz[i]++;
2253     }
2254     for (; k<nzy; k++) nnz[i]++;
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2260 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2261 {
2262   PetscErrorCode ierr;
2263   PetscInt       m = Y->rmap->N;
2264   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2265   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2273 {
2274   PetscErrorCode ierr;
2275   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2276   PetscBLASInt   bnz,one=1;
2277   Mat_SeqAIJ     *x,*y;
2278 
2279   PetscFunctionBegin;
2280   if (str == SAME_NONZERO_PATTERN) {
2281     PetscScalar alpha = a;
2282     x    = (Mat_SeqAIJ*)xx->A->data;
2283     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2284     y    = (Mat_SeqAIJ*)yy->A->data;
2285     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2286     x    = (Mat_SeqAIJ*)xx->B->data;
2287     y    = (Mat_SeqAIJ*)yy->B->data;
2288     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2289     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2290     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2291     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2292        will be updated */
2293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2294     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2295       Y->offloadmask = PETSC_OFFLOAD_CPU;
2296     }
2297 #endif
2298   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2299     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2300   } else {
2301     Mat      B;
2302     PetscInt *nnz_d,*nnz_o;
2303     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2304     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2305     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2306     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2307     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2308     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2309     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2310     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2311     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2312     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2313     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2314     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2315     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2316     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2317   }
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2322 
2323 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2324 {
2325 #if defined(PETSC_USE_COMPLEX)
2326   PetscErrorCode ierr;
2327   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2331   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2332 #else
2333   PetscFunctionBegin;
2334 #endif
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2339 {
2340   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2341   PetscErrorCode ierr;
2342 
2343   PetscFunctionBegin;
2344   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2345   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2350 {
2351   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2356   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2361 {
2362   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2363   PetscErrorCode ierr;
2364   PetscInt       i,*idxb = 0;
2365   PetscScalar    *va,*vb;
2366   Vec            vtmp;
2367 
2368   PetscFunctionBegin;
2369   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2370   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2371   if (idx) {
2372     for (i=0; i<A->rmap->n; i++) {
2373       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2374     }
2375   }
2376 
2377   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2378   if (idx) {
2379     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2380   }
2381   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2382   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2383 
2384   for (i=0; i<A->rmap->n; i++) {
2385     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2386       va[i] = vb[i];
2387       if (idx) idx[i] = a->garray[idxb[i]];
2388     }
2389   }
2390 
2391   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2392   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2393   ierr = PetscFree(idxb);CHKERRQ(ierr);
2394   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2399 {
2400   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2401   PetscErrorCode ierr;
2402   PetscInt       i,*idxb = 0;
2403   PetscScalar    *va,*vb;
2404   Vec            vtmp;
2405 
2406   PetscFunctionBegin;
2407   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2408   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2409   if (idx) {
2410     for (i=0; i<A->cmap->n; i++) {
2411       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2412     }
2413   }
2414 
2415   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2416   if (idx) {
2417     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2418   }
2419   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2420   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2421 
2422   for (i=0; i<A->rmap->n; i++) {
2423     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2424       va[i] = vb[i];
2425       if (idx) idx[i] = a->garray[idxb[i]];
2426     }
2427   }
2428 
2429   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2430   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2431   ierr = PetscFree(idxb);CHKERRQ(ierr);
2432   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2437 {
2438   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2439   PetscInt       n      = A->rmap->n;
2440   PetscInt       cstart = A->cmap->rstart;
2441   PetscInt       *cmap  = mat->garray;
2442   PetscInt       *diagIdx, *offdiagIdx;
2443   Vec            diagV, offdiagV;
2444   PetscScalar    *a, *diagA, *offdiagA;
2445   PetscInt       r;
2446   PetscErrorCode ierr;
2447 
2448   PetscFunctionBegin;
2449   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2451   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2452   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2453   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2454   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2455   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2456   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2457   for (r = 0; r < n; ++r) {
2458     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2459       a[r]   = diagA[r];
2460       idx[r] = cstart + diagIdx[r];
2461     } else {
2462       a[r]   = offdiagA[r];
2463       idx[r] = cmap[offdiagIdx[r]];
2464     }
2465   }
2466   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2468   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2469   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2470   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2471   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2476 {
2477   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2478   PetscInt       n      = A->rmap->n;
2479   PetscInt       cstart = A->cmap->rstart;
2480   PetscInt       *cmap  = mat->garray;
2481   PetscInt       *diagIdx, *offdiagIdx;
2482   Vec            diagV, offdiagV;
2483   PetscScalar    *a, *diagA, *offdiagA;
2484   PetscInt       r;
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2489   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2490   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2491   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2492   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2493   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2494   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2495   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2496   for (r = 0; r < n; ++r) {
2497     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2498       a[r]   = diagA[r];
2499       idx[r] = cstart + diagIdx[r];
2500     } else {
2501       a[r]   = offdiagA[r];
2502       idx[r] = cmap[offdiagIdx[r]];
2503     }
2504   }
2505   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2509   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2510   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2515 {
2516   PetscErrorCode ierr;
2517   Mat            *dummy;
2518 
2519   PetscFunctionBegin;
2520   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2521   *newmat = *dummy;
2522   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2527 {
2528   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2529   PetscErrorCode ierr;
2530 
2531   PetscFunctionBegin;
2532   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2533   A->factorerrortype = a->A->factorerrortype;
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2538 {
2539   PetscErrorCode ierr;
2540   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2541 
2542   PetscFunctionBegin;
2543   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2544   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2545   if (x->assembled) {
2546     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2547   } else {
2548     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2549   }
2550   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2551   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2556 {
2557   PetscFunctionBegin;
2558   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2559   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 /*@
2564    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2565 
2566    Collective on Mat
2567 
2568    Input Parameters:
2569 +    A - the matrix
2570 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2571 
2572  Level: advanced
2573 
2574 @*/
2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2576 {
2577   PetscErrorCode       ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2585 {
2586   PetscErrorCode       ierr;
2587   PetscBool            sc = PETSC_FALSE,flg;
2588 
2589   PetscFunctionBegin;
2590   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2591   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2592   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2593   if (flg) {
2594     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2595   }
2596   ierr = PetscOptionsTail();CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2601 {
2602   PetscErrorCode ierr;
2603   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2604   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2605 
2606   PetscFunctionBegin;
2607   if (!Y->preallocated) {
2608     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2609   } else if (!aij->nz) {
2610     PetscInt nonew = aij->nonew;
2611     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2612     aij->nonew = nonew;
2613   }
2614   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2619 {
2620   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2621   PetscErrorCode ierr;
2622 
2623   PetscFunctionBegin;
2624   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2625   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2626   if (d) {
2627     PetscInt rstart;
2628     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2629     *d += rstart;
2630 
2631   }
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2636 {
2637   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2638   PetscErrorCode ierr;
2639 
2640   PetscFunctionBegin;
2641   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 /* -------------------------------------------------------------------*/
2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2647                                        MatGetRow_MPIAIJ,
2648                                        MatRestoreRow_MPIAIJ,
2649                                        MatMult_MPIAIJ,
2650                                 /* 4*/ MatMultAdd_MPIAIJ,
2651                                        MatMultTranspose_MPIAIJ,
2652                                        MatMultTransposeAdd_MPIAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*10*/ 0,
2657                                        0,
2658                                        0,
2659                                        MatSOR_MPIAIJ,
2660                                        MatTranspose_MPIAIJ,
2661                                 /*15*/ MatGetInfo_MPIAIJ,
2662                                        MatEqual_MPIAIJ,
2663                                        MatGetDiagonal_MPIAIJ,
2664                                        MatDiagonalScale_MPIAIJ,
2665                                        MatNorm_MPIAIJ,
2666                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2667                                        MatAssemblyEnd_MPIAIJ,
2668                                        MatSetOption_MPIAIJ,
2669                                        MatZeroEntries_MPIAIJ,
2670                                 /*24*/ MatZeroRows_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*29*/ MatSetUp_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        MatGetDiagonalBlock_MPIAIJ,
2679                                        0,
2680                                 /*34*/ MatDuplicate_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*39*/ MatAXPY_MPIAIJ,
2686                                        MatCreateSubMatrices_MPIAIJ,
2687                                        MatIncreaseOverlap_MPIAIJ,
2688                                        MatGetValues_MPIAIJ,
2689                                        MatCopy_MPIAIJ,
2690                                 /*44*/ MatGetRowMax_MPIAIJ,
2691                                        MatScale_MPIAIJ,
2692                                        MatShift_MPIAIJ,
2693                                        MatDiagonalSet_MPIAIJ,
2694                                        MatZeroRowsColumns_MPIAIJ,
2695                                 /*49*/ MatSetRandom_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2701                                        0,
2702                                        MatSetUnfactored_MPIAIJ,
2703                                        MatPermute_MPIAIJ,
2704                                        0,
2705                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2706                                        MatDestroy_MPIAIJ,
2707                                        MatView_MPIAIJ,
2708                                        0,
2709                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2710                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2711                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2716                                        MatGetRowMinAbs_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                 /*75*/ MatFDColoringApply_AIJ,
2722                                        MatSetFromOptions_MPIAIJ,
2723                                        0,
2724                                        0,
2725                                        MatFindZeroDiagonals_MPIAIJ,
2726                                 /*80*/ 0,
2727                                        0,
2728                                        0,
2729                                 /*83*/ MatLoad_MPIAIJ,
2730                                        MatIsSymmetric_MPIAIJ,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2736                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2737                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2738                                        MatPtAP_MPIAIJ_MPIAIJ,
2739                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2740                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2741                                        0,
2742                                        0,
2743                                        0,
2744                                        MatBindToCPU_MPIAIJ,
2745                                 /*99*/ 0,
2746                                        0,
2747                                        0,
2748                                        MatConjugate_MPIAIJ,
2749                                        0,
2750                                 /*104*/MatSetValuesRow_MPIAIJ,
2751                                        MatRealPart_MPIAIJ,
2752                                        MatImaginaryPart_MPIAIJ,
2753                                        0,
2754                                        0,
2755                                 /*109*/0,
2756                                        0,
2757                                        MatGetRowMin_MPIAIJ,
2758                                        0,
2759                                        MatMissingDiagonal_MPIAIJ,
2760                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2761                                        0,
2762                                        MatGetGhosts_MPIAIJ,
2763                                        0,
2764                                        0,
2765                                 /*119*/0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                        MatGetMultiProcBlock_MPIAIJ,
2770                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2771                                        MatGetColumnNorms_MPIAIJ,
2772                                        MatInvertBlockDiagonal_MPIAIJ,
2773                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2774                                        MatCreateSubMatricesMPI_MPIAIJ,
2775                                 /*129*/0,
2776                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2777                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2778                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2779                                        0,
2780                                 /*134*/0,
2781                                        0,
2782                                        MatRARt_MPIAIJ_MPIAIJ,
2783                                        0,
2784                                        0,
2785                                 /*139*/MatSetBlockSizes_MPIAIJ,
2786                                        0,
2787                                        0,
2788                                        MatFDColoringSetUp_MPIXAIJ,
2789                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2790                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2791 };
2792 
2793 /* ----------------------------------------------------------------------------------------*/
2794 
2795 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2796 {
2797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2802   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2807 {
2808   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2813   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2818 {
2819   Mat_MPIAIJ     *b;
2820   PetscErrorCode ierr;
2821   PetscMPIInt    size;
2822 
2823   PetscFunctionBegin;
2824   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2826   b = (Mat_MPIAIJ*)B->data;
2827 
2828 #if defined(PETSC_USE_CTABLE)
2829   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2830 #else
2831   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2832 #endif
2833   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2834   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2835   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2836 
2837   /* Because the B will have been resized we simply destroy it and create a new one each time */
2838   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2839   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2840   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2841   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2842   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2843   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2844   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2845 
2846   if (!B->preallocated) {
2847     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2848     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2849     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2850     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2851     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2852   }
2853 
2854   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2855   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2856   B->preallocated  = PETSC_TRUE;
2857   B->was_assembled = PETSC_FALSE;
2858   B->assembled     = PETSC_FALSE;
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2863 {
2864   Mat_MPIAIJ     *b;
2865   PetscErrorCode ierr;
2866 
2867   PetscFunctionBegin;
2868   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2869   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2870   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2871   b = (Mat_MPIAIJ*)B->data;
2872 
2873 #if defined(PETSC_USE_CTABLE)
2874   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2875 #else
2876   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2877 #endif
2878   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2879   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2880   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2881 
2882   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2883   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2884   B->preallocated  = PETSC_TRUE;
2885   B->was_assembled = PETSC_FALSE;
2886   B->assembled = PETSC_FALSE;
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2891 {
2892   Mat            mat;
2893   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2894   PetscErrorCode ierr;
2895 
2896   PetscFunctionBegin;
2897   *newmat = 0;
2898   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2899   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2900   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2901   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2902   a       = (Mat_MPIAIJ*)mat->data;
2903 
2904   mat->factortype   = matin->factortype;
2905   mat->assembled    = PETSC_TRUE;
2906   mat->insertmode   = NOT_SET_VALUES;
2907   mat->preallocated = PETSC_TRUE;
2908 
2909   a->size         = oldmat->size;
2910   a->rank         = oldmat->rank;
2911   a->donotstash   = oldmat->donotstash;
2912   a->roworiented  = oldmat->roworiented;
2913   a->rowindices   = 0;
2914   a->rowvalues    = 0;
2915   a->getrowactive = PETSC_FALSE;
2916 
2917   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2918   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2919 
2920   if (oldmat->colmap) {
2921 #if defined(PETSC_USE_CTABLE)
2922     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2923 #else
2924     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2925     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2926     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2927 #endif
2928   } else a->colmap = 0;
2929   if (oldmat->garray) {
2930     PetscInt len;
2931     len  = oldmat->B->cmap->n;
2932     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2933     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2934     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2935   } else a->garray = 0;
2936 
2937   /* It may happen MatDuplicate is called with a non-assembled matrix
2938      In fact, MatDuplicate only requires the matrix to be preallocated
2939      This may happen inside a DMCreateMatrix_Shell */
2940   if (oldmat->lvec) {
2941     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2942     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2943   }
2944   if (oldmat->Mvctx) {
2945     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2946     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2947   }
2948   if (oldmat->Mvctx_mpi1) {
2949     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2950     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2951   }
2952 
2953   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2954   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2955   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2956   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2957   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2958   *newmat = mat;
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2963 {
2964   PetscBool      isbinary, ishdf5;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2969   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2970   /* force binary viewer to load .info file if it has not yet done so */
2971   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2972   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2973   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2974   if (isbinary) {
2975     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2976   } else if (ishdf5) {
2977 #if defined(PETSC_HAVE_HDF5)
2978     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2979 #else
2980     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2981 #endif
2982   } else {
2983     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2984   }
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2989 {
2990   PetscScalar    *vals,*svals;
2991   MPI_Comm       comm;
2992   PetscErrorCode ierr;
2993   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2994   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2995   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2996   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2997   PetscInt       cend,cstart,n,*rowners;
2998   int            fd;
2999   PetscInt       bs = newMat->rmap->bs;
3000 
3001   PetscFunctionBegin;
3002   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3003   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3004   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3005   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3006   if (!rank) {
3007     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3008     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3009     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
3010   }
3011 
3012   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
3013   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3014   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3015   if (bs < 0) bs = 1;
3016 
3017   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3018   M    = header[1]; N = header[2];
3019 
3020   /* If global sizes are set, check if they are consistent with that given in the file */
3021   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3022   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3023 
3024   /* determine ownership of all (block) rows */
3025   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3026   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3027   else m = newMat->rmap->n; /* Set by user */
3028 
3029   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3030   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3031 
3032   /* First process needs enough room for process with most rows */
3033   if (!rank) {
3034     mmax = rowners[1];
3035     for (i=2; i<=size; i++) {
3036       mmax = PetscMax(mmax, rowners[i]);
3037     }
3038   } else mmax = -1;             /* unused, but compilers complain */
3039 
3040   rowners[0] = 0;
3041   for (i=2; i<=size; i++) {
3042     rowners[i] += rowners[i-1];
3043   }
3044   rstart = rowners[rank];
3045   rend   = rowners[rank+1];
3046 
3047   /* distribute row lengths to all processors */
3048   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3049   if (!rank) {
3050     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3051     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3052     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3053     for (j=0; j<m; j++) {
3054       procsnz[0] += ourlens[j];
3055     }
3056     for (i=1; i<size; i++) {
3057       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3058       /* calculate the number of nonzeros on each processor */
3059       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3060         procsnz[i] += rowlengths[j];
3061       }
3062       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3063     }
3064     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3065   } else {
3066     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3067   }
3068 
3069   if (!rank) {
3070     /* determine max buffer needed and allocate it */
3071     maxnz = 0;
3072     for (i=0; i<size; i++) {
3073       maxnz = PetscMax(maxnz,procsnz[i]);
3074     }
3075     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3076 
3077     /* read in my part of the matrix column indices  */
3078     nz   = procsnz[0];
3079     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3080     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3081 
3082     /* read in every one elses and ship off */
3083     for (i=1; i<size; i++) {
3084       nz   = procsnz[i];
3085       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3086       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3087     }
3088     ierr = PetscFree(cols);CHKERRQ(ierr);
3089   } else {
3090     /* determine buffer space needed for message */
3091     nz = 0;
3092     for (i=0; i<m; i++) {
3093       nz += ourlens[i];
3094     }
3095     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3096 
3097     /* receive message of column indices*/
3098     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3099   }
3100 
3101   /* determine column ownership if matrix is not square */
3102   if (N != M) {
3103     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3104     else n = newMat->cmap->n;
3105     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3106     cstart = cend - n;
3107   } else {
3108     cstart = rstart;
3109     cend   = rend;
3110     n      = cend - cstart;
3111   }
3112 
3113   /* loop over local rows, determining number of off diagonal entries */
3114   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3115   jj   = 0;
3116   for (i=0; i<m; i++) {
3117     for (j=0; j<ourlens[i]; j++) {
3118       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3119       jj++;
3120     }
3121   }
3122 
3123   for (i=0; i<m; i++) {
3124     ourlens[i] -= offlens[i];
3125   }
3126   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3127 
3128   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3129 
3130   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3131 
3132   for (i=0; i<m; i++) {
3133     ourlens[i] += offlens[i];
3134   }
3135 
3136   if (!rank) {
3137     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3138 
3139     /* read in my part of the matrix numerical values  */
3140     nz   = procsnz[0];
3141     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3142 
3143     /* insert into matrix */
3144     jj      = rstart;
3145     smycols = mycols;
3146     svals   = vals;
3147     for (i=0; i<m; i++) {
3148       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3149       smycols += ourlens[i];
3150       svals   += ourlens[i];
3151       jj++;
3152     }
3153 
3154     /* read in other processors and ship out */
3155     for (i=1; i<size; i++) {
3156       nz   = procsnz[i];
3157       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3158       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3159     }
3160     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3161   } else {
3162     /* receive numeric values */
3163     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3164 
3165     /* receive message of values*/
3166     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3167 
3168     /* insert into matrix */
3169     jj      = rstart;
3170     smycols = mycols;
3171     svals   = vals;
3172     for (i=0; i<m; i++) {
3173       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3174       smycols += ourlens[i];
3175       svals   += ourlens[i];
3176       jj++;
3177     }
3178   }
3179   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3180   ierr = PetscFree(vals);CHKERRQ(ierr);
3181   ierr = PetscFree(mycols);CHKERRQ(ierr);
3182   ierr = PetscFree(rowners);CHKERRQ(ierr);
3183   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3184   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3185   PetscFunctionReturn(0);
3186 }
3187 
3188 /* Not scalable because of ISAllGather() unless getting all columns. */
3189 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3190 {
3191   PetscErrorCode ierr;
3192   IS             iscol_local;
3193   PetscBool      isstride;
3194   PetscMPIInt    lisstride=0,gisstride;
3195 
3196   PetscFunctionBegin;
3197   /* check if we are grabbing all columns*/
3198   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3199 
3200   if (isstride) {
3201     PetscInt  start,len,mstart,mlen;
3202     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3203     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3204     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3205     if (mstart == start && mlen-mstart == len) lisstride = 1;
3206   }
3207 
3208   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3209   if (gisstride) {
3210     PetscInt N;
3211     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3212     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3213     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3214     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3215   } else {
3216     PetscInt cbs;
3217     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3218     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3219     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3220   }
3221 
3222   *isseq = iscol_local;
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 /*
3227  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3228  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3229 
3230  Input Parameters:
3231    mat - matrix
3232    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3233            i.e., mat->rstart <= isrow[i] < mat->rend
3234    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3235            i.e., mat->cstart <= iscol[i] < mat->cend
3236  Output Parameter:
3237    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3238    iscol_o - sequential column index set for retrieving mat->B
3239    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3240  */
3241 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3242 {
3243   PetscErrorCode ierr;
3244   Vec            x,cmap;
3245   const PetscInt *is_idx;
3246   PetscScalar    *xarray,*cmaparray;
3247   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3248   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3249   Mat            B=a->B;
3250   Vec            lvec=a->lvec,lcmap;
3251   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3252   MPI_Comm       comm;
3253   VecScatter     Mvctx=a->Mvctx;
3254 
3255   PetscFunctionBegin;
3256   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3257   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3258 
3259   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3260   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3261   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3262   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3263   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3264 
3265   /* Get start indices */
3266   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3267   isstart -= ncols;
3268   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3269 
3270   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3271   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3272   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3273   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3274   for (i=0; i<ncols; i++) {
3275     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3276     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3277     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3278   }
3279   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3280   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3281   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3282 
3283   /* Get iscol_d */
3284   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3285   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3286   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3287 
3288   /* Get isrow_d */
3289   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3290   rstart = mat->rmap->rstart;
3291   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3292   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3293   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3294   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3295 
3296   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3297   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3298   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3299 
3300   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3301   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3302   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3303 
3304   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3305 
3306   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3307   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3308 
3309   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3310   /* off-process column indices */
3311   count = 0;
3312   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3313   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3314 
3315   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3316   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3317   for (i=0; i<Bn; i++) {
3318     if (PetscRealPart(xarray[i]) > -1.0) {
3319       idx[count]     = i;                   /* local column index in off-diagonal part B */
3320       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3321       count++;
3322     }
3323   }
3324   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3325   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3326 
3327   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3328   /* cannot ensure iscol_o has same blocksize as iscol! */
3329 
3330   ierr = PetscFree(idx);CHKERRQ(ierr);
3331   *garray = cmap1;
3332 
3333   ierr = VecDestroy(&x);CHKERRQ(ierr);
3334   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3335   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3340 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3341 {
3342   PetscErrorCode ierr;
3343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3344   Mat            M = NULL;
3345   MPI_Comm       comm;
3346   IS             iscol_d,isrow_d,iscol_o;
3347   Mat            Asub = NULL,Bsub = NULL;
3348   PetscInt       n;
3349 
3350   PetscFunctionBegin;
3351   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3352 
3353   if (call == MAT_REUSE_MATRIX) {
3354     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3355     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3356     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3357 
3358     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3359     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3360 
3361     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3362     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3363 
3364     /* Update diagonal and off-diagonal portions of submat */
3365     asub = (Mat_MPIAIJ*)(*submat)->data;
3366     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3367     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3368     if (n) {
3369       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3370     }
3371     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3372     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3373 
3374   } else { /* call == MAT_INITIAL_MATRIX) */
3375     const PetscInt *garray;
3376     PetscInt        BsubN;
3377 
3378     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3379     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3380 
3381     /* Create local submatrices Asub and Bsub */
3382     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3383     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3384 
3385     /* Create submatrix M */
3386     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3387 
3388     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3389     asub = (Mat_MPIAIJ*)M->data;
3390 
3391     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3392     n = asub->B->cmap->N;
3393     if (BsubN > n) {
3394       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3395       const PetscInt *idx;
3396       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3397       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3398 
3399       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3400       j = 0;
3401       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3402       for (i=0; i<n; i++) {
3403         if (j >= BsubN) break;
3404         while (subgarray[i] > garray[j]) j++;
3405 
3406         if (subgarray[i] == garray[j]) {
3407           idx_new[i] = idx[j++];
3408         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3409       }
3410       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3411 
3412       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3413       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3414 
3415     } else if (BsubN < n) {
3416       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3417     }
3418 
3419     ierr = PetscFree(garray);CHKERRQ(ierr);
3420     *submat = M;
3421 
3422     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3423     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3424     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3425 
3426     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3427     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3428 
3429     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3430     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3431   }
3432   PetscFunctionReturn(0);
3433 }
3434 
3435 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3436 {
3437   PetscErrorCode ierr;
3438   IS             iscol_local=NULL,isrow_d;
3439   PetscInt       csize;
3440   PetscInt       n,i,j,start,end;
3441   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3442   MPI_Comm       comm;
3443 
3444   PetscFunctionBegin;
3445   /* If isrow has same processor distribution as mat,
3446      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3447   if (call == MAT_REUSE_MATRIX) {
3448     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3449     if (isrow_d) {
3450       sameRowDist  = PETSC_TRUE;
3451       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3452     } else {
3453       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3454       if (iscol_local) {
3455         sameRowDist  = PETSC_TRUE;
3456         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3457       }
3458     }
3459   } else {
3460     /* Check if isrow has same processor distribution as mat */
3461     sameDist[0] = PETSC_FALSE;
3462     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3463     if (!n) {
3464       sameDist[0] = PETSC_TRUE;
3465     } else {
3466       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3467       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3468       if (i >= start && j < end) {
3469         sameDist[0] = PETSC_TRUE;
3470       }
3471     }
3472 
3473     /* Check if iscol has same processor distribution as mat */
3474     sameDist[1] = PETSC_FALSE;
3475     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3476     if (!n) {
3477       sameDist[1] = PETSC_TRUE;
3478     } else {
3479       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3480       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3481       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3482     }
3483 
3484     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3485     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3486     sameRowDist = tsameDist[0];
3487   }
3488 
3489   if (sameRowDist) {
3490     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3491       /* isrow and iscol have same processor distribution as mat */
3492       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3493       PetscFunctionReturn(0);
3494     } else { /* sameRowDist */
3495       /* isrow has same processor distribution as mat */
3496       if (call == MAT_INITIAL_MATRIX) {
3497         PetscBool sorted;
3498         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3499         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3500         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3501         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3502 
3503         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3504         if (sorted) {
3505           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3506           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3507           PetscFunctionReturn(0);
3508         }
3509       } else { /* call == MAT_REUSE_MATRIX */
3510         IS    iscol_sub;
3511         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3512         if (iscol_sub) {
3513           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3514           PetscFunctionReturn(0);
3515         }
3516       }
3517     }
3518   }
3519 
3520   /* General case: iscol -> iscol_local which has global size of iscol */
3521   if (call == MAT_REUSE_MATRIX) {
3522     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3523     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3524   } else {
3525     if (!iscol_local) {
3526       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3527     }
3528   }
3529 
3530   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3531   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3532 
3533   if (call == MAT_INITIAL_MATRIX) {
3534     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3535     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3536   }
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 /*@C
3541      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3542          and "off-diagonal" part of the matrix in CSR format.
3543 
3544    Collective
3545 
3546    Input Parameters:
3547 +  comm - MPI communicator
3548 .  A - "diagonal" portion of matrix
3549 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3550 -  garray - global index of B columns
3551 
3552    Output Parameter:
3553 .   mat - the matrix, with input A as its local diagonal matrix
3554    Level: advanced
3555 
3556    Notes:
3557        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3558        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3559 
3560 .seealso: MatCreateMPIAIJWithSplitArrays()
3561 @*/
3562 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3563 {
3564   PetscErrorCode ierr;
3565   Mat_MPIAIJ     *maij;
3566   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3567   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3568   PetscScalar    *oa=b->a;
3569   Mat            Bnew;
3570   PetscInt       m,n,N;
3571 
3572   PetscFunctionBegin;
3573   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3574   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3575   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3576   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3577   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3578   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3579 
3580   /* Get global columns of mat */
3581   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3582 
3583   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3584   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3585   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3586   maij = (Mat_MPIAIJ*)(*mat)->data;
3587 
3588   (*mat)->preallocated = PETSC_TRUE;
3589 
3590   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3591   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3592 
3593   /* Set A as diagonal portion of *mat */
3594   maij->A = A;
3595 
3596   nz = oi[m];
3597   for (i=0; i<nz; i++) {
3598     col   = oj[i];
3599     oj[i] = garray[col];
3600   }
3601 
3602    /* Set Bnew as off-diagonal portion of *mat */
3603   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3604   bnew        = (Mat_SeqAIJ*)Bnew->data;
3605   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3606   maij->B     = Bnew;
3607 
3608   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3609 
3610   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3611   b->free_a       = PETSC_FALSE;
3612   b->free_ij      = PETSC_FALSE;
3613   ierr = MatDestroy(&B);CHKERRQ(ierr);
3614 
3615   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3616   bnew->free_a       = PETSC_TRUE;
3617   bnew->free_ij      = PETSC_TRUE;
3618 
3619   /* condense columns of maij->B */
3620   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3621   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3624   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3629 
3630 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3631 {
3632   PetscErrorCode ierr;
3633   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3634   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3635   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3636   Mat            M,Msub,B=a->B;
3637   MatScalar      *aa;
3638   Mat_SeqAIJ     *aij;
3639   PetscInt       *garray = a->garray,*colsub,Ncols;
3640   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3641   IS             iscol_sub,iscmap;
3642   const PetscInt *is_idx,*cmap;
3643   PetscBool      allcolumns=PETSC_FALSE;
3644   MPI_Comm       comm;
3645 
3646   PetscFunctionBegin;
3647   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3648 
3649   if (call == MAT_REUSE_MATRIX) {
3650     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3651     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3652     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3653 
3654     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3655     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3656 
3657     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3658     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3659 
3660     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3661 
3662   } else { /* call == MAT_INITIAL_MATRIX) */
3663     PetscBool flg;
3664 
3665     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3666     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3667 
3668     /* (1) iscol -> nonscalable iscol_local */
3669     /* Check for special case: each processor gets entire matrix columns */
3670     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3671     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3672     if (allcolumns) {
3673       iscol_sub = iscol_local;
3674       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3675       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3676 
3677     } else {
3678       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3679       PetscInt *idx,*cmap1,k;
3680       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3681       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3682       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3683       count = 0;
3684       k     = 0;
3685       for (i=0; i<Ncols; i++) {
3686         j = is_idx[i];
3687         if (j >= cstart && j < cend) {
3688           /* diagonal part of mat */
3689           idx[count]     = j;
3690           cmap1[count++] = i; /* column index in submat */
3691         } else if (Bn) {
3692           /* off-diagonal part of mat */
3693           if (j == garray[k]) {
3694             idx[count]     = j;
3695             cmap1[count++] = i;  /* column index in submat */
3696           } else if (j > garray[k]) {
3697             while (j > garray[k] && k < Bn-1) k++;
3698             if (j == garray[k]) {
3699               idx[count]     = j;
3700               cmap1[count++] = i; /* column index in submat */
3701             }
3702           }
3703         }
3704       }
3705       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3706 
3707       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3708       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3709       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3710 
3711       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3712     }
3713 
3714     /* (3) Create sequential Msub */
3715     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3716   }
3717 
3718   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3719   aij  = (Mat_SeqAIJ*)(Msub)->data;
3720   ii   = aij->i;
3721   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3722 
3723   /*
3724       m - number of local rows
3725       Ncols - number of columns (same on all processors)
3726       rstart - first row in new global matrix generated
3727   */
3728   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3729 
3730   if (call == MAT_INITIAL_MATRIX) {
3731     /* (4) Create parallel newmat */
3732     PetscMPIInt    rank,size;
3733     PetscInt       csize;
3734 
3735     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3736     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3737 
3738     /*
3739         Determine the number of non-zeros in the diagonal and off-diagonal
3740         portions of the matrix in order to do correct preallocation
3741     */
3742 
3743     /* first get start and end of "diagonal" columns */
3744     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3745     if (csize == PETSC_DECIDE) {
3746       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3747       if (mglobal == Ncols) { /* square matrix */
3748         nlocal = m;
3749       } else {
3750         nlocal = Ncols/size + ((Ncols % size) > rank);
3751       }
3752     } else {
3753       nlocal = csize;
3754     }
3755     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3756     rstart = rend - nlocal;
3757     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3758 
3759     /* next, compute all the lengths */
3760     jj    = aij->j;
3761     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3762     olens = dlens + m;
3763     for (i=0; i<m; i++) {
3764       jend = ii[i+1] - ii[i];
3765       olen = 0;
3766       dlen = 0;
3767       for (j=0; j<jend; j++) {
3768         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3769         else dlen++;
3770         jj++;
3771       }
3772       olens[i] = olen;
3773       dlens[i] = dlen;
3774     }
3775 
3776     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3777     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3778 
3779     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3780     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3781     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3782     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3783     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3784     ierr = PetscFree(dlens);CHKERRQ(ierr);
3785 
3786   } else { /* call == MAT_REUSE_MATRIX */
3787     M    = *newmat;
3788     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3789     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3790     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3791     /*
3792          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3793        rather than the slower MatSetValues().
3794     */
3795     M->was_assembled = PETSC_TRUE;
3796     M->assembled     = PETSC_FALSE;
3797   }
3798 
3799   /* (5) Set values of Msub to *newmat */
3800   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3801   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3802 
3803   jj   = aij->j;
3804   aa   = aij->a;
3805   for (i=0; i<m; i++) {
3806     row = rstart + i;
3807     nz  = ii[i+1] - ii[i];
3808     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3809     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3810     jj += nz; aa += nz;
3811   }
3812   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3813 
3814   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3815   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3816 
3817   ierr = PetscFree(colsub);CHKERRQ(ierr);
3818 
3819   /* save Msub, iscol_sub and iscmap used in processor for next request */
3820   if (call ==  MAT_INITIAL_MATRIX) {
3821     *newmat = M;
3822     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3823     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3824 
3825     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3826     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3827 
3828     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3829     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3830 
3831     if (iscol_local) {
3832       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3833       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3834     }
3835   }
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /*
3840     Not great since it makes two copies of the submatrix, first an SeqAIJ
3841   in local and then by concatenating the local matrices the end result.
3842   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3843 
3844   Note: This requires a sequential iscol with all indices.
3845 */
3846 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3847 {
3848   PetscErrorCode ierr;
3849   PetscMPIInt    rank,size;
3850   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3851   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3852   Mat            M,Mreuse;
3853   MatScalar      *aa,*vwork;
3854   MPI_Comm       comm;
3855   Mat_SeqAIJ     *aij;
3856   PetscBool      colflag,allcolumns=PETSC_FALSE;
3857 
3858   PetscFunctionBegin;
3859   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3860   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3861   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3862 
3863   /* Check for special case: each processor gets entire matrix columns */
3864   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3865   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3866   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3867 
3868   if (call ==  MAT_REUSE_MATRIX) {
3869     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3870     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3871     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3872   } else {
3873     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3874   }
3875 
3876   /*
3877       m - number of local rows
3878       n - number of columns (same on all processors)
3879       rstart - first row in new global matrix generated
3880   */
3881   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3882   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3883   if (call == MAT_INITIAL_MATRIX) {
3884     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3885     ii  = aij->i;
3886     jj  = aij->j;
3887 
3888     /*
3889         Determine the number of non-zeros in the diagonal and off-diagonal
3890         portions of the matrix in order to do correct preallocation
3891     */
3892 
3893     /* first get start and end of "diagonal" columns */
3894     if (csize == PETSC_DECIDE) {
3895       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3896       if (mglobal == n) { /* square matrix */
3897         nlocal = m;
3898       } else {
3899         nlocal = n/size + ((n % size) > rank);
3900       }
3901     } else {
3902       nlocal = csize;
3903     }
3904     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3905     rstart = rend - nlocal;
3906     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3907 
3908     /* next, compute all the lengths */
3909     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3910     olens = dlens + m;
3911     for (i=0; i<m; i++) {
3912       jend = ii[i+1] - ii[i];
3913       olen = 0;
3914       dlen = 0;
3915       for (j=0; j<jend; j++) {
3916         if (*jj < rstart || *jj >= rend) olen++;
3917         else dlen++;
3918         jj++;
3919       }
3920       olens[i] = olen;
3921       dlens[i] = dlen;
3922     }
3923     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3924     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3925     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3926     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3927     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3928     ierr = PetscFree(dlens);CHKERRQ(ierr);
3929   } else {
3930     PetscInt ml,nl;
3931 
3932     M    = *newmat;
3933     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3934     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3935     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3936     /*
3937          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3938        rather than the slower MatSetValues().
3939     */
3940     M->was_assembled = PETSC_TRUE;
3941     M->assembled     = PETSC_FALSE;
3942   }
3943   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3944   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3945   ii   = aij->i;
3946   jj   = aij->j;
3947   aa   = aij->a;
3948   for (i=0; i<m; i++) {
3949     row   = rstart + i;
3950     nz    = ii[i+1] - ii[i];
3951     cwork = jj;     jj += nz;
3952     vwork = aa;     aa += nz;
3953     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3954   }
3955 
3956   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3957   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3958   *newmat = M;
3959 
3960   /* save submatrix used in processor for next request */
3961   if (call ==  MAT_INITIAL_MATRIX) {
3962     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3963     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3964   }
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3969 {
3970   PetscInt       m,cstart, cend,j,nnz,i,d;
3971   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3972   const PetscInt *JJ;
3973   PetscErrorCode ierr;
3974   PetscBool      nooffprocentries;
3975 
3976   PetscFunctionBegin;
3977   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3978 
3979   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3980   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3981   m      = B->rmap->n;
3982   cstart = B->cmap->rstart;
3983   cend   = B->cmap->rend;
3984   rstart = B->rmap->rstart;
3985 
3986   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3987 
3988 #if defined(PETSC_USE_DEBUG)
3989   for (i=0; i<m; i++) {
3990     nnz = Ii[i+1]- Ii[i];
3991     JJ  = J + Ii[i];
3992     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3993     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3994     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3995   }
3996 #endif
3997 
3998   for (i=0; i<m; i++) {
3999     nnz     = Ii[i+1]- Ii[i];
4000     JJ      = J + Ii[i];
4001     nnz_max = PetscMax(nnz_max,nnz);
4002     d       = 0;
4003     for (j=0; j<nnz; j++) {
4004       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4005     }
4006     d_nnz[i] = d;
4007     o_nnz[i] = nnz - d;
4008   }
4009   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4010   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4011 
4012   for (i=0; i<m; i++) {
4013     ii   = i + rstart;
4014     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4015   }
4016   nooffprocentries    = B->nooffprocentries;
4017   B->nooffprocentries = PETSC_TRUE;
4018   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4019   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4020   B->nooffprocentries = nooffprocentries;
4021 
4022   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4023   PetscFunctionReturn(0);
4024 }
4025 
4026 /*@
4027    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4028    (the default parallel PETSc format).
4029 
4030    Collective
4031 
4032    Input Parameters:
4033 +  B - the matrix
4034 .  i - the indices into j for the start of each local row (starts with zero)
4035 .  j - the column indices for each local row (starts with zero)
4036 -  v - optional values in the matrix
4037 
4038    Level: developer
4039 
4040    Notes:
4041        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4042      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4043      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4044 
4045        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4046 
4047        The format which is used for the sparse matrix input, is equivalent to a
4048     row-major ordering.. i.e for the following matrix, the input data expected is
4049     as shown
4050 
4051 $        1 0 0
4052 $        2 0 3     P0
4053 $       -------
4054 $        4 5 6     P1
4055 $
4056 $     Process0 [P0]: rows_owned=[0,1]
4057 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4058 $        j =  {0,0,2}  [size = 3]
4059 $        v =  {1,2,3}  [size = 3]
4060 $
4061 $     Process1 [P1]: rows_owned=[2]
4062 $        i =  {0,3}    [size = nrow+1  = 1+1]
4063 $        j =  {0,1,2}  [size = 3]
4064 $        v =  {4,5,6}  [size = 3]
4065 
4066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4067           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4068 @*/
4069 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4070 {
4071   PetscErrorCode ierr;
4072 
4073   PetscFunctionBegin;
4074   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4075   PetscFunctionReturn(0);
4076 }
4077 
4078 /*@C
4079    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4080    (the default parallel PETSc format).  For good matrix assembly performance
4081    the user should preallocate the matrix storage by setting the parameters
4082    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4083    performance can be increased by more than a factor of 50.
4084 
4085    Collective
4086 
4087    Input Parameters:
4088 +  B - the matrix
4089 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4090            (same value is used for all local rows)
4091 .  d_nnz - array containing the number of nonzeros in the various rows of the
4092            DIAGONAL portion of the local submatrix (possibly different for each row)
4093            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4094            The size of this array is equal to the number of local rows, i.e 'm'.
4095            For matrices that will be factored, you must leave room for (and set)
4096            the diagonal entry even if it is zero.
4097 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4098            submatrix (same value is used for all local rows).
4099 -  o_nnz - array containing the number of nonzeros in the various rows of the
4100            OFF-DIAGONAL portion of the local submatrix (possibly different for
4101            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4102            structure. The size of this array is equal to the number
4103            of local rows, i.e 'm'.
4104 
4105    If the *_nnz parameter is given then the *_nz parameter is ignored
4106 
4107    The AIJ format (also called the Yale sparse matrix format or
4108    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4109    storage.  The stored row and column indices begin with zero.
4110    See Users-Manual: ch_mat for details.
4111 
4112    The parallel matrix is partitioned such that the first m0 rows belong to
4113    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4114    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4115 
4116    The DIAGONAL portion of the local submatrix of a processor can be defined
4117    as the submatrix which is obtained by extraction the part corresponding to
4118    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4119    first row that belongs to the processor, r2 is the last row belonging to
4120    the this processor, and c1-c2 is range of indices of the local part of a
4121    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4122    common case of a square matrix, the row and column ranges are the same and
4123    the DIAGONAL part is also square. The remaining portion of the local
4124    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4125 
4126    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4127 
4128    You can call MatGetInfo() to get information on how effective the preallocation was;
4129    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4130    You can also run with the option -info and look for messages with the string
4131    malloc in them to see if additional memory allocation was needed.
4132 
4133    Example usage:
4134 
4135    Consider the following 8x8 matrix with 34 non-zero values, that is
4136    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4137    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4138    as follows:
4139 
4140 .vb
4141             1  2  0  |  0  3  0  |  0  4
4142     Proc0   0  5  6  |  7  0  0  |  8  0
4143             9  0 10  | 11  0  0  | 12  0
4144     -------------------------------------
4145            13  0 14  | 15 16 17  |  0  0
4146     Proc1   0 18  0  | 19 20 21  |  0  0
4147             0  0  0  | 22 23  0  | 24  0
4148     -------------------------------------
4149     Proc2  25 26 27  |  0  0 28  | 29  0
4150            30  0  0  | 31 32 33  |  0 34
4151 .ve
4152 
4153    This can be represented as a collection of submatrices as:
4154 
4155 .vb
4156       A B C
4157       D E F
4158       G H I
4159 .ve
4160 
4161    Where the submatrices A,B,C are owned by proc0, D,E,F are
4162    owned by proc1, G,H,I are owned by proc2.
4163 
4164    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4165    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4166    The 'M','N' parameters are 8,8, and have the same values on all procs.
4167 
4168    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4169    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4170    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4171    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4172    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4173    matrix, ans [DF] as another SeqAIJ matrix.
4174 
4175    When d_nz, o_nz parameters are specified, d_nz storage elements are
4176    allocated for every row of the local diagonal submatrix, and o_nz
4177    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4178    One way to choose d_nz and o_nz is to use the max nonzerors per local
4179    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4180    In this case, the values of d_nz,o_nz are:
4181 .vb
4182      proc0 : dnz = 2, o_nz = 2
4183      proc1 : dnz = 3, o_nz = 2
4184      proc2 : dnz = 1, o_nz = 4
4185 .ve
4186    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4187    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4188    for proc3. i.e we are using 12+15+10=37 storage locations to store
4189    34 values.
4190 
4191    When d_nnz, o_nnz parameters are specified, the storage is specified
4192    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4193    In the above case the values for d_nnz,o_nnz are:
4194 .vb
4195      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4196      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4197      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4198 .ve
4199    Here the space allocated is sum of all the above values i.e 34, and
4200    hence pre-allocation is perfect.
4201 
4202    Level: intermediate
4203 
4204 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4205           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4206 @*/
4207 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4208 {
4209   PetscErrorCode ierr;
4210 
4211   PetscFunctionBegin;
4212   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4213   PetscValidType(B,1);
4214   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4215   PetscFunctionReturn(0);
4216 }
4217 
4218 /*@
4219      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4220          CSR format for the local rows.
4221 
4222    Collective
4223 
4224    Input Parameters:
4225 +  comm - MPI communicator
4226 .  m - number of local rows (Cannot be PETSC_DECIDE)
4227 .  n - This value should be the same as the local size used in creating the
4228        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4229        calculated if N is given) For square matrices n is almost always m.
4230 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4231 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4232 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4233 .   j - column indices
4234 -   a - matrix values
4235 
4236    Output Parameter:
4237 .   mat - the matrix
4238 
4239    Level: intermediate
4240 
4241    Notes:
4242        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4243      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4244      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4245 
4246        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4247 
4248        The format which is used for the sparse matrix input, is equivalent to a
4249     row-major ordering.. i.e for the following matrix, the input data expected is
4250     as shown
4251 
4252        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4253 
4254 $        1 0 0
4255 $        2 0 3     P0
4256 $       -------
4257 $        4 5 6     P1
4258 $
4259 $     Process0 [P0]: rows_owned=[0,1]
4260 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4261 $        j =  {0,0,2}  [size = 3]
4262 $        v =  {1,2,3}  [size = 3]
4263 $
4264 $     Process1 [P1]: rows_owned=[2]
4265 $        i =  {0,3}    [size = nrow+1  = 1+1]
4266 $        j =  {0,1,2}  [size = 3]
4267 $        v =  {4,5,6}  [size = 3]
4268 
4269 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4270           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4271 @*/
4272 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4273 {
4274   PetscErrorCode ierr;
4275 
4276   PetscFunctionBegin;
4277   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4278   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4279   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4280   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4281   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4282   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4283   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4284   PetscFunctionReturn(0);
4285 }
4286 
4287 /*@
4288      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4289          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4290 
4291    Collective
4292 
4293    Input Parameters:
4294 +  mat - the matrix
4295 .  m - number of local rows (Cannot be PETSC_DECIDE)
4296 .  n - This value should be the same as the local size used in creating the
4297        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4298        calculated if N is given) For square matrices n is almost always m.
4299 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4300 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4301 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4302 .  J - column indices
4303 -  v - matrix values
4304 
4305    Level: intermediate
4306 
4307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4308           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4309 @*/
4310 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4311 {
4312   PetscErrorCode ierr;
4313   PetscInt       cstart,nnz,i,j;
4314   PetscInt       *ld;
4315   PetscBool      nooffprocentries;
4316   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4317   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4318   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4319   const PetscInt *Adi = Ad->i;
4320   PetscInt       ldi,Iii,md;
4321 
4322   PetscFunctionBegin;
4323   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4324   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4325   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4326   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4327 
4328   cstart = mat->cmap->rstart;
4329   if (!Aij->ld) {
4330     /* count number of entries below block diagonal */
4331     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4332     Aij->ld = ld;
4333     for (i=0; i<m; i++) {
4334       nnz  = Ii[i+1]- Ii[i];
4335       j     = 0;
4336       while  (J[j] < cstart && j < nnz) {j++;}
4337       J    += nnz;
4338       ld[i] = j;
4339     }
4340   } else {
4341     ld = Aij->ld;
4342   }
4343 
4344   for (i=0; i<m; i++) {
4345     nnz  = Ii[i+1]- Ii[i];
4346     Iii  = Ii[i];
4347     ldi  = ld[i];
4348     md   = Adi[i+1]-Adi[i];
4349     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4350     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4351     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4352     ad  += md;
4353     ao  += nnz - md;
4354   }
4355   nooffprocentries      = mat->nooffprocentries;
4356   mat->nooffprocentries = PETSC_TRUE;
4357   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4358   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4359   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4360   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4361   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4362   mat->nooffprocentries = nooffprocentries;
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 /*@C
4367    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4368    (the default parallel PETSc format).  For good matrix assembly performance
4369    the user should preallocate the matrix storage by setting the parameters
4370    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4371    performance can be increased by more than a factor of 50.
4372 
4373    Collective
4374 
4375    Input Parameters:
4376 +  comm - MPI communicator
4377 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4378            This value should be the same as the local size used in creating the
4379            y vector for the matrix-vector product y = Ax.
4380 .  n - This value should be the same as the local size used in creating the
4381        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4382        calculated if N is given) For square matrices n is almost always m.
4383 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4384 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4385 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4386            (same value is used for all local rows)
4387 .  d_nnz - array containing the number of nonzeros in the various rows of the
4388            DIAGONAL portion of the local submatrix (possibly different for each row)
4389            or NULL, if d_nz is used to specify the nonzero structure.
4390            The size of this array is equal to the number of local rows, i.e 'm'.
4391 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4392            submatrix (same value is used for all local rows).
4393 -  o_nnz - array containing the number of nonzeros in the various rows of the
4394            OFF-DIAGONAL portion of the local submatrix (possibly different for
4395            each row) or NULL, if o_nz is used to specify the nonzero
4396            structure. The size of this array is equal to the number
4397            of local rows, i.e 'm'.
4398 
4399    Output Parameter:
4400 .  A - the matrix
4401 
4402    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4403    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4404    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4405 
4406    Notes:
4407    If the *_nnz parameter is given then the *_nz parameter is ignored
4408 
4409    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4410    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4411    storage requirements for this matrix.
4412 
4413    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4414    processor than it must be used on all processors that share the object for
4415    that argument.
4416 
4417    The user MUST specify either the local or global matrix dimensions
4418    (possibly both).
4419 
4420    The parallel matrix is partitioned across processors such that the
4421    first m0 rows belong to process 0, the next m1 rows belong to
4422    process 1, the next m2 rows belong to process 2 etc.. where
4423    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4424    values corresponding to [m x N] submatrix.
4425 
4426    The columns are logically partitioned with the n0 columns belonging
4427    to 0th partition, the next n1 columns belonging to the next
4428    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4429 
4430    The DIAGONAL portion of the local submatrix on any given processor
4431    is the submatrix corresponding to the rows and columns m,n
4432    corresponding to the given processor. i.e diagonal matrix on
4433    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4434    etc. The remaining portion of the local submatrix [m x (N-n)]
4435    constitute the OFF-DIAGONAL portion. The example below better
4436    illustrates this concept.
4437 
4438    For a square global matrix we define each processor's diagonal portion
4439    to be its local rows and the corresponding columns (a square submatrix);
4440    each processor's off-diagonal portion encompasses the remainder of the
4441    local matrix (a rectangular submatrix).
4442 
4443    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4444 
4445    When calling this routine with a single process communicator, a matrix of
4446    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4447    type of communicator, use the construction mechanism
4448 .vb
4449      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4450 .ve
4451 
4452 $     MatCreate(...,&A);
4453 $     MatSetType(A,MATMPIAIJ);
4454 $     MatSetSizes(A, m,n,M,N);
4455 $     MatMPIAIJSetPreallocation(A,...);
4456 
4457    By default, this format uses inodes (identical nodes) when possible.
4458    We search for consecutive rows with the same nonzero structure, thereby
4459    reusing matrix information to achieve increased efficiency.
4460 
4461    Options Database Keys:
4462 +  -mat_no_inode  - Do not use inodes
4463 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4464 
4465 
4466 
4467    Example usage:
4468 
4469    Consider the following 8x8 matrix with 34 non-zero values, that is
4470    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4471    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4472    as follows
4473 
4474 .vb
4475             1  2  0  |  0  3  0  |  0  4
4476     Proc0   0  5  6  |  7  0  0  |  8  0
4477             9  0 10  | 11  0  0  | 12  0
4478     -------------------------------------
4479            13  0 14  | 15 16 17  |  0  0
4480     Proc1   0 18  0  | 19 20 21  |  0  0
4481             0  0  0  | 22 23  0  | 24  0
4482     -------------------------------------
4483     Proc2  25 26 27  |  0  0 28  | 29  0
4484            30  0  0  | 31 32 33  |  0 34
4485 .ve
4486 
4487    This can be represented as a collection of submatrices as
4488 
4489 .vb
4490       A B C
4491       D E F
4492       G H I
4493 .ve
4494 
4495    Where the submatrices A,B,C are owned by proc0, D,E,F are
4496    owned by proc1, G,H,I are owned by proc2.
4497 
4498    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4499    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4500    The 'M','N' parameters are 8,8, and have the same values on all procs.
4501 
4502    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4503    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4504    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4505    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4506    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4507    matrix, ans [DF] as another SeqAIJ matrix.
4508 
4509    When d_nz, o_nz parameters are specified, d_nz storage elements are
4510    allocated for every row of the local diagonal submatrix, and o_nz
4511    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4512    One way to choose d_nz and o_nz is to use the max nonzerors per local
4513    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4514    In this case, the values of d_nz,o_nz are
4515 .vb
4516      proc0 : dnz = 2, o_nz = 2
4517      proc1 : dnz = 3, o_nz = 2
4518      proc2 : dnz = 1, o_nz = 4
4519 .ve
4520    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4521    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4522    for proc3. i.e we are using 12+15+10=37 storage locations to store
4523    34 values.
4524 
4525    When d_nnz, o_nnz parameters are specified, the storage is specified
4526    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4527    In the above case the values for d_nnz,o_nnz are
4528 .vb
4529      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4530      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4531      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4532 .ve
4533    Here the space allocated is sum of all the above values i.e 34, and
4534    hence pre-allocation is perfect.
4535 
4536    Level: intermediate
4537 
4538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4539           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4540 @*/
4541 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4542 {
4543   PetscErrorCode ierr;
4544   PetscMPIInt    size;
4545 
4546   PetscFunctionBegin;
4547   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4548   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4550   if (size > 1) {
4551     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4552     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4553   } else {
4554     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4555     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4556   }
4557   PetscFunctionReturn(0);
4558 }
4559 
4560 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4561 {
4562   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4563   PetscBool      flg;
4564   PetscErrorCode ierr;
4565 
4566   PetscFunctionBegin;
4567   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4568   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4569   if (Ad)     *Ad     = a->A;
4570   if (Ao)     *Ao     = a->B;
4571   if (colmap) *colmap = a->garray;
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4576 {
4577   PetscErrorCode ierr;
4578   PetscInt       m,N,i,rstart,nnz,Ii;
4579   PetscInt       *indx;
4580   PetscScalar    *values;
4581 
4582   PetscFunctionBegin;
4583   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4584   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4585     PetscInt       *dnz,*onz,sum,bs,cbs;
4586 
4587     if (n == PETSC_DECIDE) {
4588       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4589     }
4590     /* Check sum(n) = N */
4591     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4592     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4593 
4594     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4595     rstart -= m;
4596 
4597     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4598     for (i=0; i<m; i++) {
4599       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4600       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4601       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4602     }
4603 
4604     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4605     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4606     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4607     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4608     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4609     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4610     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4611     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4612   }
4613 
4614   /* numeric phase */
4615   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4616   for (i=0; i<m; i++) {
4617     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4618     Ii   = i + rstart;
4619     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4620     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4621   }
4622   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4623   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4624   PetscFunctionReturn(0);
4625 }
4626 
4627 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4628 {
4629   PetscErrorCode    ierr;
4630   PetscMPIInt       rank;
4631   PetscInt          m,N,i,rstart,nnz;
4632   size_t            len;
4633   const PetscInt    *indx;
4634   PetscViewer       out;
4635   char              *name;
4636   Mat               B;
4637   const PetscScalar *values;
4638 
4639   PetscFunctionBegin;
4640   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4641   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4642   /* Should this be the type of the diagonal block of A? */
4643   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4644   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4645   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4646   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4647   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4648   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4649   for (i=0; i<m; i++) {
4650     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4651     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4652     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4653   }
4654   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4655   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4656 
4657   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4658   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4659   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4660   sprintf(name,"%s.%d",outfile,rank);
4661   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4662   ierr = PetscFree(name);CHKERRQ(ierr);
4663   ierr = MatView(B,out);CHKERRQ(ierr);
4664   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4665   ierr = MatDestroy(&B);CHKERRQ(ierr);
4666   PetscFunctionReturn(0);
4667 }
4668 
4669 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4670 {
4671   PetscErrorCode      ierr;
4672   Mat_Merge_SeqsToMPI *merge;
4673   PetscContainer      container;
4674 
4675   PetscFunctionBegin;
4676   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4677   if (container) {
4678     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4679     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4680     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4681     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4682     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4683     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4684     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4685     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4686     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4687     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4688     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4689     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4690     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4691     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4692     ierr = PetscFree(merge);CHKERRQ(ierr);
4693     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4694   }
4695   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4696   PetscFunctionReturn(0);
4697 }
4698 
4699 #include <../src/mat/utils/freespace.h>
4700 #include <petscbt.h>
4701 
4702 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4703 {
4704   PetscErrorCode      ierr;
4705   MPI_Comm            comm;
4706   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4707   PetscMPIInt         size,rank,taga,*len_s;
4708   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4709   PetscInt            proc,m;
4710   PetscInt            **buf_ri,**buf_rj;
4711   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4712   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4713   MPI_Request         *s_waits,*r_waits;
4714   MPI_Status          *status;
4715   MatScalar           *aa=a->a;
4716   MatScalar           **abuf_r,*ba_i;
4717   Mat_Merge_SeqsToMPI *merge;
4718   PetscContainer      container;
4719 
4720   PetscFunctionBegin;
4721   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4722   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4723 
4724   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4725   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4726 
4727   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4728   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4729 
4730   bi     = merge->bi;
4731   bj     = merge->bj;
4732   buf_ri = merge->buf_ri;
4733   buf_rj = merge->buf_rj;
4734 
4735   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4736   owners = merge->rowmap->range;
4737   len_s  = merge->len_s;
4738 
4739   /* send and recv matrix values */
4740   /*-----------------------------*/
4741   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4742   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4743 
4744   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4745   for (proc=0,k=0; proc<size; proc++) {
4746     if (!len_s[proc]) continue;
4747     i    = owners[proc];
4748     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4749     k++;
4750   }
4751 
4752   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4753   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4754   ierr = PetscFree(status);CHKERRQ(ierr);
4755 
4756   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4757   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4758 
4759   /* insert mat values of mpimat */
4760   /*----------------------------*/
4761   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4762   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4763 
4764   for (k=0; k<merge->nrecv; k++) {
4765     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4766     nrows       = *(buf_ri_k[k]);
4767     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4768     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4769   }
4770 
4771   /* set values of ba */
4772   m = merge->rowmap->n;
4773   for (i=0; i<m; i++) {
4774     arow = owners[rank] + i;
4775     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4776     bnzi = bi[i+1] - bi[i];
4777     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4778 
4779     /* add local non-zero vals of this proc's seqmat into ba */
4780     anzi   = ai[arow+1] - ai[arow];
4781     aj     = a->j + ai[arow];
4782     aa     = a->a + ai[arow];
4783     nextaj = 0;
4784     for (j=0; nextaj<anzi; j++) {
4785       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4786         ba_i[j] += aa[nextaj++];
4787       }
4788     }
4789 
4790     /* add received vals into ba */
4791     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4792       /* i-th row */
4793       if (i == *nextrow[k]) {
4794         anzi   = *(nextai[k]+1) - *nextai[k];
4795         aj     = buf_rj[k] + *(nextai[k]);
4796         aa     = abuf_r[k] + *(nextai[k]);
4797         nextaj = 0;
4798         for (j=0; nextaj<anzi; j++) {
4799           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4800             ba_i[j] += aa[nextaj++];
4801           }
4802         }
4803         nextrow[k]++; nextai[k]++;
4804       }
4805     }
4806     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4807   }
4808   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4809   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4810 
4811   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4812   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4813   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4814   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4815   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4816   PetscFunctionReturn(0);
4817 }
4818 
4819 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4820 {
4821   PetscErrorCode      ierr;
4822   Mat                 B_mpi;
4823   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4824   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4825   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4826   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4827   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4828   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4829   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4830   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4831   MPI_Status          *status;
4832   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4833   PetscBT             lnkbt;
4834   Mat_Merge_SeqsToMPI *merge;
4835   PetscContainer      container;
4836 
4837   PetscFunctionBegin;
4838   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4839 
4840   /* make sure it is a PETSc comm */
4841   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4842   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4843   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4844 
4845   ierr = PetscNew(&merge);CHKERRQ(ierr);
4846   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4847 
4848   /* determine row ownership */
4849   /*---------------------------------------------------------*/
4850   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4851   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4852   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4853   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4854   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4855   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4856   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4857 
4858   m      = merge->rowmap->n;
4859   owners = merge->rowmap->range;
4860 
4861   /* determine the number of messages to send, their lengths */
4862   /*---------------------------------------------------------*/
4863   len_s = merge->len_s;
4864 
4865   len          = 0; /* length of buf_si[] */
4866   merge->nsend = 0;
4867   for (proc=0; proc<size; proc++) {
4868     len_si[proc] = 0;
4869     if (proc == rank) {
4870       len_s[proc] = 0;
4871     } else {
4872       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4873       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4874     }
4875     if (len_s[proc]) {
4876       merge->nsend++;
4877       nrows = 0;
4878       for (i=owners[proc]; i<owners[proc+1]; i++) {
4879         if (ai[i+1] > ai[i]) nrows++;
4880       }
4881       len_si[proc] = 2*(nrows+1);
4882       len         += len_si[proc];
4883     }
4884   }
4885 
4886   /* determine the number and length of messages to receive for ij-structure */
4887   /*-------------------------------------------------------------------------*/
4888   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4889   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4890 
4891   /* post the Irecv of j-structure */
4892   /*-------------------------------*/
4893   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4894   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4895 
4896   /* post the Isend of j-structure */
4897   /*--------------------------------*/
4898   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4899 
4900   for (proc=0, k=0; proc<size; proc++) {
4901     if (!len_s[proc]) continue;
4902     i    = owners[proc];
4903     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4904     k++;
4905   }
4906 
4907   /* receives and sends of j-structure are complete */
4908   /*------------------------------------------------*/
4909   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4910   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4911 
4912   /* send and recv i-structure */
4913   /*---------------------------*/
4914   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4915   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4916 
4917   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4918   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4919   for (proc=0,k=0; proc<size; proc++) {
4920     if (!len_s[proc]) continue;
4921     /* form outgoing message for i-structure:
4922          buf_si[0]:                 nrows to be sent
4923                [1:nrows]:           row index (global)
4924                [nrows+1:2*nrows+1]: i-structure index
4925     */
4926     /*-------------------------------------------*/
4927     nrows       = len_si[proc]/2 - 1;
4928     buf_si_i    = buf_si + nrows+1;
4929     buf_si[0]   = nrows;
4930     buf_si_i[0] = 0;
4931     nrows       = 0;
4932     for (i=owners[proc]; i<owners[proc+1]; i++) {
4933       anzi = ai[i+1] - ai[i];
4934       if (anzi) {
4935         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4936         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4937         nrows++;
4938       }
4939     }
4940     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4941     k++;
4942     buf_si += len_si[proc];
4943   }
4944 
4945   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4946   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4947 
4948   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4949   for (i=0; i<merge->nrecv; i++) {
4950     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4951   }
4952 
4953   ierr = PetscFree(len_si);CHKERRQ(ierr);
4954   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4955   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4956   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4957   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4958   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4959   ierr = PetscFree(status);CHKERRQ(ierr);
4960 
4961   /* compute a local seq matrix in each processor */
4962   /*----------------------------------------------*/
4963   /* allocate bi array and free space for accumulating nonzero column info */
4964   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4965   bi[0] = 0;
4966 
4967   /* create and initialize a linked list */
4968   nlnk = N+1;
4969   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4970 
4971   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4972   len  = ai[owners[rank+1]] - ai[owners[rank]];
4973   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4974 
4975   current_space = free_space;
4976 
4977   /* determine symbolic info for each local row */
4978   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4979 
4980   for (k=0; k<merge->nrecv; k++) {
4981     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4982     nrows       = *buf_ri_k[k];
4983     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4984     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4985   }
4986 
4987   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4988   len  = 0;
4989   for (i=0; i<m; i++) {
4990     bnzi = 0;
4991     /* add local non-zero cols of this proc's seqmat into lnk */
4992     arow  = owners[rank] + i;
4993     anzi  = ai[arow+1] - ai[arow];
4994     aj    = a->j + ai[arow];
4995     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4996     bnzi += nlnk;
4997     /* add received col data into lnk */
4998     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4999       if (i == *nextrow[k]) { /* i-th row */
5000         anzi  = *(nextai[k]+1) - *nextai[k];
5001         aj    = buf_rj[k] + *nextai[k];
5002         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5003         bnzi += nlnk;
5004         nextrow[k]++; nextai[k]++;
5005       }
5006     }
5007     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5008 
5009     /* if free space is not available, make more free space */
5010     if (current_space->local_remaining<bnzi) {
5011       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5012       nspacedouble++;
5013     }
5014     /* copy data into free space, then initialize lnk */
5015     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5016     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5017 
5018     current_space->array           += bnzi;
5019     current_space->local_used      += bnzi;
5020     current_space->local_remaining -= bnzi;
5021 
5022     bi[i+1] = bi[i] + bnzi;
5023   }
5024 
5025   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5026 
5027   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5028   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5029   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5030 
5031   /* create symbolic parallel matrix B_mpi */
5032   /*---------------------------------------*/
5033   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5034   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5035   if (n==PETSC_DECIDE) {
5036     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5037   } else {
5038     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5039   }
5040   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5041   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5042   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5043   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5044   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5045 
5046   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5047   B_mpi->assembled    = PETSC_FALSE;
5048   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5049   merge->bi           = bi;
5050   merge->bj           = bj;
5051   merge->buf_ri       = buf_ri;
5052   merge->buf_rj       = buf_rj;
5053   merge->coi          = NULL;
5054   merge->coj          = NULL;
5055   merge->owners_co    = NULL;
5056 
5057   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5058 
5059   /* attach the supporting struct to B_mpi for reuse */
5060   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5061   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5062   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5063   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5064   *mpimat = B_mpi;
5065 
5066   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5067   PetscFunctionReturn(0);
5068 }
5069 
5070 /*@C
5071       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5072                  matrices from each processor
5073 
5074     Collective
5075 
5076    Input Parameters:
5077 +    comm - the communicators the parallel matrix will live on
5078 .    seqmat - the input sequential matrices
5079 .    m - number of local rows (or PETSC_DECIDE)
5080 .    n - number of local columns (or PETSC_DECIDE)
5081 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5082 
5083    Output Parameter:
5084 .    mpimat - the parallel matrix generated
5085 
5086     Level: advanced
5087 
5088    Notes:
5089      The dimensions of the sequential matrix in each processor MUST be the same.
5090      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5091      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5092 @*/
5093 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5094 {
5095   PetscErrorCode ierr;
5096   PetscMPIInt    size;
5097 
5098   PetscFunctionBegin;
5099   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5100   if (size == 1) {
5101     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5102     if (scall == MAT_INITIAL_MATRIX) {
5103       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5104     } else {
5105       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5106     }
5107     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5108     PetscFunctionReturn(0);
5109   }
5110   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5111   if (scall == MAT_INITIAL_MATRIX) {
5112     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5113   }
5114   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5115   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5116   PetscFunctionReturn(0);
5117 }
5118 
5119 /*@
5120      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5121           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5122           with MatGetSize()
5123 
5124     Not Collective
5125 
5126    Input Parameters:
5127 +    A - the matrix
5128 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5129 
5130    Output Parameter:
5131 .    A_loc - the local sequential matrix generated
5132 
5133     Level: developer
5134 
5135    Notes:
5136      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5137      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5138      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5139      modify the values of the returned A_loc.
5140 
5141 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5142 
5143 @*/
5144 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5145 {
5146   PetscErrorCode ierr;
5147   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5148   Mat_SeqAIJ     *mat,*a,*b;
5149   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5150   MatScalar      *aa,*ba,*cam;
5151   PetscScalar    *ca;
5152   PetscMPIInt    size;
5153   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5154   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5155   PetscBool      match;
5156 
5157   PetscFunctionBegin;
5158   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5159   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5160   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5161   if (size == 1) {
5162     if (scall == MAT_INITIAL_MATRIX) {
5163       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5164       *A_loc = mpimat->A;
5165     } else if (scall == MAT_REUSE_MATRIX) {
5166       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5167     }
5168     PetscFunctionReturn(0);
5169   }
5170 
5171   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5172   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5173   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5174   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5175   aa = a->a; ba = b->a;
5176   if (scall == MAT_INITIAL_MATRIX) {
5177     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5178     ci[0] = 0;
5179     for (i=0; i<am; i++) {
5180       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5181     }
5182     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5183     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5184     k    = 0;
5185     for (i=0; i<am; i++) {
5186       ncols_o = bi[i+1] - bi[i];
5187       ncols_d = ai[i+1] - ai[i];
5188       /* off-diagonal portion of A */
5189       for (jo=0; jo<ncols_o; jo++) {
5190         col = cmap[*bj];
5191         if (col >= cstart) break;
5192         cj[k]   = col; bj++;
5193         ca[k++] = *ba++;
5194       }
5195       /* diagonal portion of A */
5196       for (j=0; j<ncols_d; j++) {
5197         cj[k]   = cstart + *aj++;
5198         ca[k++] = *aa++;
5199       }
5200       /* off-diagonal portion of A */
5201       for (j=jo; j<ncols_o; j++) {
5202         cj[k]   = cmap[*bj++];
5203         ca[k++] = *ba++;
5204       }
5205     }
5206     /* put together the new matrix */
5207     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5208     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5209     /* Since these are PETSc arrays, change flags to free them as necessary. */
5210     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5211     mat->free_a  = PETSC_TRUE;
5212     mat->free_ij = PETSC_TRUE;
5213     mat->nonew   = 0;
5214   } else if (scall == MAT_REUSE_MATRIX) {
5215     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5216     ci = mat->i; cj = mat->j; cam = mat->a;
5217     for (i=0; i<am; i++) {
5218       /* off-diagonal portion of A */
5219       ncols_o = bi[i+1] - bi[i];
5220       for (jo=0; jo<ncols_o; jo++) {
5221         col = cmap[*bj];
5222         if (col >= cstart) break;
5223         *cam++ = *ba++; bj++;
5224       }
5225       /* diagonal portion of A */
5226       ncols_d = ai[i+1] - ai[i];
5227       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5228       /* off-diagonal portion of A */
5229       for (j=jo; j<ncols_o; j++) {
5230         *cam++ = *ba++; bj++;
5231       }
5232     }
5233   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5234   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5235   PetscFunctionReturn(0);
5236 }
5237 
5238 /*@C
5239      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5240 
5241     Not Collective
5242 
5243    Input Parameters:
5244 +    A - the matrix
5245 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5246 -    row, col - index sets of rows and columns to extract (or NULL)
5247 
5248    Output Parameter:
5249 .    A_loc - the local sequential matrix generated
5250 
5251     Level: developer
5252 
5253 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5254 
5255 @*/
5256 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5257 {
5258   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5259   PetscErrorCode ierr;
5260   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5261   IS             isrowa,iscola;
5262   Mat            *aloc;
5263   PetscBool      match;
5264 
5265   PetscFunctionBegin;
5266   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5267   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5268   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5269   if (!row) {
5270     start = A->rmap->rstart; end = A->rmap->rend;
5271     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5272   } else {
5273     isrowa = *row;
5274   }
5275   if (!col) {
5276     start = A->cmap->rstart;
5277     cmap  = a->garray;
5278     nzA   = a->A->cmap->n;
5279     nzB   = a->B->cmap->n;
5280     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5281     ncols = 0;
5282     for (i=0; i<nzB; i++) {
5283       if (cmap[i] < start) idx[ncols++] = cmap[i];
5284       else break;
5285     }
5286     imark = i;
5287     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5288     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5289     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5290   } else {
5291     iscola = *col;
5292   }
5293   if (scall != MAT_INITIAL_MATRIX) {
5294     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5295     aloc[0] = *A_loc;
5296   }
5297   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5298   if (!col) { /* attach global id of condensed columns */
5299     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5300   }
5301   *A_loc = aloc[0];
5302   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5303   if (!row) {
5304     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5305   }
5306   if (!col) {
5307     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5308   }
5309   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5310   PetscFunctionReturn(0);
5311 }
5312 
5313 /*
5314  * Destroy a mat that may be composed with PetscSF communication objects.
5315  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5316  * */
5317 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5318 {
5319   PetscSF          sf,osf;
5320   IS               map;
5321   PetscErrorCode   ierr;
5322 
5323   PetscFunctionBegin;
5324   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5325   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5326   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5327   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5328   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5329   ierr = ISDestroy(&map);CHKERRQ(ierr);
5330   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5331   PetscFunctionReturn(0);
5332 }
5333 
5334 /*
5335  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5336  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5337  * on a global size.
5338  * */
5339 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5340 {
5341   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5342   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5343   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5344   PetscMPIInt              owner;
5345   PetscSFNode              *iremote,*oiremote;
5346   const PetscInt           *lrowindices;
5347   PetscErrorCode           ierr;
5348   PetscSF                  sf,osf;
5349   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5350   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5351   MPI_Comm                 comm;
5352   ISLocalToGlobalMapping   mapping;
5353 
5354   PetscFunctionBegin;
5355   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5356   /* plocalsize is the number of roots
5357    * nrows is the number of leaves
5358    * */
5359   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5360   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5361   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5362   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5363   for (i=0;i<nrows;i++) {
5364     /* Find a remote index and an owner for a row
5365      * The row could be local or remote
5366      * */
5367     owner = 0;
5368     lidx  = 0;
5369     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5370     iremote[i].index = lidx;
5371     iremote[i].rank  = owner;
5372   }
5373   /* Create SF to communicate how many nonzero columns for each row */
5374   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5375   /* SF will figure out the number of nonzero colunms for each row, and their
5376    * offsets
5377    * */
5378   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5379   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5380   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5381 
5382   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5383   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5384   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5385   roffsets[0] = 0;
5386   roffsets[1] = 0;
5387   for (i=0;i<plocalsize;i++) {
5388     /* diag */
5389     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5390     /* off diag */
5391     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5392     /* compute offsets so that we relative location for each row */
5393     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5394     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5395   }
5396   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5397   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5398   /* 'r' means root, and 'l' means leaf */
5399   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5400   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5401   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5402   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5403   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5404   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5405   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5406   dntotalcols = 0;
5407   ontotalcols = 0;
5408   ncol = 0;
5409   for (i=0;i<nrows;i++) {
5410     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5411     ncol = PetscMax(pnnz[i],ncol);
5412     /* diag */
5413     dntotalcols += nlcols[i*2+0];
5414     /* off diag */
5415     ontotalcols += nlcols[i*2+1];
5416   }
5417   /* We do not need to figure the right number of columns
5418    * since all the calculations will be done by going through the raw data
5419    * */
5420   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5421   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5422   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5423   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5424   /* diag */
5425   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5426   /* off diag */
5427   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5428   /* diag */
5429   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5430   /* off diag */
5431   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5432   dntotalcols = 0;
5433   ontotalcols = 0;
5434   ntotalcols  = 0;
5435   for (i=0;i<nrows;i++) {
5436     owner = 0;
5437     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5438     /* Set iremote for diag matrix */
5439     for (j=0;j<nlcols[i*2+0];j++) {
5440       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5441       iremote[dntotalcols].rank    = owner;
5442       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5443       ilocal[dntotalcols++]        = ntotalcols++;
5444     }
5445     /* off diag */
5446     for (j=0;j<nlcols[i*2+1];j++) {
5447       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5448       oiremote[ontotalcols].rank    = owner;
5449       oilocal[ontotalcols++]        = ntotalcols++;
5450     }
5451   }
5452   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5453   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5454   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5455   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5456   /* P serves as roots and P_oth is leaves
5457    * Diag matrix
5458    * */
5459   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5460   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5461   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5462 
5463   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5464   /* Off diag */
5465   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5466   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5467   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5468   /* We operate on the matrix internal data for saving memory */
5469   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5470   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5471   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5472   /* Convert to global indices for diag matrix */
5473   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5474   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5475   /* We want P_oth store global indices */
5476   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5477   /* Use memory scalable approach */
5478   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5479   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5480   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5481   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5482   /* Convert back to local indices */
5483   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5484   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5485   nout = 0;
5486   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5487   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5488   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5489   /* Exchange values */
5490   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5491   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5492   /* Stop PETSc from shrinking memory */
5493   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5494   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5495   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5496   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5497   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5498   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5499   /* ``New MatDestroy" takes care of PetscSF objects as well */
5500   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5501   PetscFunctionReturn(0);
5502 }
5503 
5504 /*
5505  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5506  * This supports MPIAIJ and MAIJ
5507  * */
5508 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5509 {
5510   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5511   Mat_SeqAIJ            *p_oth;
5512   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5513   IS                    rows,map;
5514   PetscHMapI            hamp;
5515   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5516   MPI_Comm              comm;
5517   PetscSF               sf,osf;
5518   PetscBool             has;
5519   PetscErrorCode        ierr;
5520 
5521   PetscFunctionBegin;
5522   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5523   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5524   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5525    *  and then create a submatrix (that often is an overlapping matrix)
5526    * */
5527   if (reuse==MAT_INITIAL_MATRIX) {
5528     /* Use a hash table to figure out unique keys */
5529     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5530     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5531     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5532     count = 0;
5533     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5534     for (i=0;i<a->B->cmap->n;i++) {
5535       key  = a->garray[i]/dof;
5536       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5537       if (!has) {
5538         mapping[i] = count;
5539         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5540       } else {
5541         /* Current 'i' has the same value the previous step */
5542         mapping[i] = count-1;
5543       }
5544     }
5545     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5546     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5547     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5548     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5549     off = 0;
5550     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5551     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5552     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5553     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5554     /* In case, the matrix was already created but users want to recreate the matrix */
5555     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5556     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5557     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5558     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5559   } else if (reuse==MAT_REUSE_MATRIX) {
5560     /* If matrix was already created, we simply update values using SF objects
5561      * that as attached to the matrix ealier.
5562      *  */
5563     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5564     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5565     if (!sf || !osf) {
5566       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5567     }
5568     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5569     /* Update values in place */
5570     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5571     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5572     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5573     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5574   } else {
5575     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5576   }
5577   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5578   PetscFunctionReturn(0);
5579 }
5580 
5581 /*@C
5582     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5583 
5584     Collective on Mat
5585 
5586    Input Parameters:
5587 +    A,B - the matrices in mpiaij format
5588 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5589 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5590 
5591    Output Parameter:
5592 +    rowb, colb - index sets of rows and columns of B to extract
5593 -    B_seq - the sequential matrix generated
5594 
5595     Level: developer
5596 
5597 @*/
5598 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5599 {
5600   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5601   PetscErrorCode ierr;
5602   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5603   IS             isrowb,iscolb;
5604   Mat            *bseq=NULL;
5605 
5606   PetscFunctionBegin;
5607   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5608     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5609   }
5610   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5611 
5612   if (scall == MAT_INITIAL_MATRIX) {
5613     start = A->cmap->rstart;
5614     cmap  = a->garray;
5615     nzA   = a->A->cmap->n;
5616     nzB   = a->B->cmap->n;
5617     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5618     ncols = 0;
5619     for (i=0; i<nzB; i++) {  /* row < local row index */
5620       if (cmap[i] < start) idx[ncols++] = cmap[i];
5621       else break;
5622     }
5623     imark = i;
5624     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5625     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5626     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5627     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5628   } else {
5629     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5630     isrowb  = *rowb; iscolb = *colb;
5631     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5632     bseq[0] = *B_seq;
5633   }
5634   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5635   *B_seq = bseq[0];
5636   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5637   if (!rowb) {
5638     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5639   } else {
5640     *rowb = isrowb;
5641   }
5642   if (!colb) {
5643     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5644   } else {
5645     *colb = iscolb;
5646   }
5647   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5648   PetscFunctionReturn(0);
5649 }
5650 
5651 /*
5652     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5653     of the OFF-DIAGONAL portion of local A
5654 
5655     Collective on Mat
5656 
5657    Input Parameters:
5658 +    A,B - the matrices in mpiaij format
5659 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5660 
5661    Output Parameter:
5662 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5663 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5664 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5665 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5666 
5667     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5668      for this matrix. This is not desirable..
5669 
5670     Level: developer
5671 
5672 */
5673 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5674 {
5675   PetscErrorCode         ierr;
5676   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5677   Mat_SeqAIJ             *b_oth;
5678   VecScatter             ctx;
5679   MPI_Comm               comm;
5680   const PetscMPIInt      *rprocs,*sprocs;
5681   const PetscInt         *srow,*rstarts,*sstarts;
5682   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5683   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5684   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5685   MPI_Request            *rwaits = NULL,*swaits = NULL;
5686   MPI_Status             rstatus;
5687   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5688 
5689   PetscFunctionBegin;
5690   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5691   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5692 
5693   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5694     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5695   }
5696   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5697   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5698 
5699   if (size == 1) {
5700     startsj_s = NULL;
5701     bufa_ptr  = NULL;
5702     *B_oth    = NULL;
5703     PetscFunctionReturn(0);
5704   }
5705 
5706   ctx = a->Mvctx;
5707   tag = ((PetscObject)ctx)->tag;
5708 
5709   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5710   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5711   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5712   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5713   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5714   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5715   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5716 
5717   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5718   if (scall == MAT_INITIAL_MATRIX) {
5719     /* i-array */
5720     /*---------*/
5721     /*  post receives */
5722     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5723     for (i=0; i<nrecvs; i++) {
5724       rowlen = rvalues + rstarts[i]*rbs;
5725       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5726       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5727     }
5728 
5729     /* pack the outgoing message */
5730     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5731 
5732     sstartsj[0] = 0;
5733     rstartsj[0] = 0;
5734     len         = 0; /* total length of j or a array to be sent */
5735     if (nsends) {
5736       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5737       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5738     }
5739     for (i=0; i<nsends; i++) {
5740       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5741       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5742       for (j=0; j<nrows; j++) {
5743         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5744         for (l=0; l<sbs; l++) {
5745           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5746 
5747           rowlen[j*sbs+l] = ncols;
5748 
5749           len += ncols;
5750           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5751         }
5752         k++;
5753       }
5754       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5755 
5756       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5757     }
5758     /* recvs and sends of i-array are completed */
5759     i = nrecvs;
5760     while (i--) {
5761       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5762     }
5763     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5764     ierr = PetscFree(svalues);CHKERRQ(ierr);
5765 
5766     /* allocate buffers for sending j and a arrays */
5767     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5768     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5769 
5770     /* create i-array of B_oth */
5771     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5772 
5773     b_othi[0] = 0;
5774     len       = 0; /* total length of j or a array to be received */
5775     k         = 0;
5776     for (i=0; i<nrecvs; i++) {
5777       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5778       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5779       for (j=0; j<nrows; j++) {
5780         b_othi[k+1] = b_othi[k] + rowlen[j];
5781         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5782         k++;
5783       }
5784       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5785     }
5786     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5787 
5788     /* allocate space for j and a arrrays of B_oth */
5789     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5790     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5791 
5792     /* j-array */
5793     /*---------*/
5794     /*  post receives of j-array */
5795     for (i=0; i<nrecvs; i++) {
5796       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5797       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5798     }
5799 
5800     /* pack the outgoing message j-array */
5801     if (nsends) k = sstarts[0];
5802     for (i=0; i<nsends; i++) {
5803       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5804       bufJ  = bufj+sstartsj[i];
5805       for (j=0; j<nrows; j++) {
5806         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5807         for (ll=0; ll<sbs; ll++) {
5808           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5809           for (l=0; l<ncols; l++) {
5810             *bufJ++ = cols[l];
5811           }
5812           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5813         }
5814       }
5815       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5816     }
5817 
5818     /* recvs and sends of j-array are completed */
5819     i = nrecvs;
5820     while (i--) {
5821       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5822     }
5823     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5824   } else if (scall == MAT_REUSE_MATRIX) {
5825     sstartsj = *startsj_s;
5826     rstartsj = *startsj_r;
5827     bufa     = *bufa_ptr;
5828     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5829     b_otha   = b_oth->a;
5830   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5831 
5832   /* a-array */
5833   /*---------*/
5834   /*  post receives of a-array */
5835   for (i=0; i<nrecvs; i++) {
5836     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5837     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5838   }
5839 
5840   /* pack the outgoing message a-array */
5841   if (nsends) k = sstarts[0];
5842   for (i=0; i<nsends; i++) {
5843     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5844     bufA  = bufa+sstartsj[i];
5845     for (j=0; j<nrows; j++) {
5846       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5847       for (ll=0; ll<sbs; ll++) {
5848         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5849         for (l=0; l<ncols; l++) {
5850           *bufA++ = vals[l];
5851         }
5852         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5853       }
5854     }
5855     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5856   }
5857   /* recvs and sends of a-array are completed */
5858   i = nrecvs;
5859   while (i--) {
5860     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5861   }
5862   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5863   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5864 
5865   if (scall == MAT_INITIAL_MATRIX) {
5866     /* put together the new matrix */
5867     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5868 
5869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5870     /* Since these are PETSc arrays, change flags to free them as necessary. */
5871     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5872     b_oth->free_a  = PETSC_TRUE;
5873     b_oth->free_ij = PETSC_TRUE;
5874     b_oth->nonew   = 0;
5875 
5876     ierr = PetscFree(bufj);CHKERRQ(ierr);
5877     if (!startsj_s || !bufa_ptr) {
5878       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5879       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5880     } else {
5881       *startsj_s = sstartsj;
5882       *startsj_r = rstartsj;
5883       *bufa_ptr  = bufa;
5884     }
5885   }
5886 
5887   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5888   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5889   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5890   PetscFunctionReturn(0);
5891 }
5892 
5893 /*@C
5894   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5895 
5896   Not Collective
5897 
5898   Input Parameters:
5899 . A - The matrix in mpiaij format
5900 
5901   Output Parameter:
5902 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5903 . colmap - A map from global column index to local index into lvec
5904 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5905 
5906   Level: developer
5907 
5908 @*/
5909 #if defined(PETSC_USE_CTABLE)
5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5911 #else
5912 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5913 #endif
5914 {
5915   Mat_MPIAIJ *a;
5916 
5917   PetscFunctionBegin;
5918   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5919   PetscValidPointer(lvec, 2);
5920   PetscValidPointer(colmap, 3);
5921   PetscValidPointer(multScatter, 4);
5922   a = (Mat_MPIAIJ*) A->data;
5923   if (lvec) *lvec = a->lvec;
5924   if (colmap) *colmap = a->colmap;
5925   if (multScatter) *multScatter = a->Mvctx;
5926   PetscFunctionReturn(0);
5927 }
5928 
5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5932 #if defined(PETSC_HAVE_MKL_SPARSE)
5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5934 #endif
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5937 #if defined(PETSC_HAVE_ELEMENTAL)
5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5939 #endif
5940 #if defined(PETSC_HAVE_HYPRE)
5941 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5942 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5943 #endif
5944 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5946 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5947 
5948 /*
5949     Computes (B'*A')' since computing B*A directly is untenable
5950 
5951                n                       p                          p
5952         (              )       (              )         (                  )
5953       m (      A       )  *  n (       B      )   =   m (         C        )
5954         (              )       (              )         (                  )
5955 
5956 */
5957 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5958 {
5959   PetscErrorCode ierr;
5960   Mat            At,Bt,Ct;
5961 
5962   PetscFunctionBegin;
5963   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5964   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5965   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5966   ierr = MatDestroy(&At);CHKERRQ(ierr);
5967   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5968   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5969   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5970   PetscFunctionReturn(0);
5971 }
5972 
5973 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5974 {
5975   PetscErrorCode ierr;
5976   PetscInt       m=A->rmap->n,n=B->cmap->n;
5977   Mat            Cmat;
5978 
5979   PetscFunctionBegin;
5980   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5981   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5982   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5983   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5984   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5985   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5986   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5987   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5988 
5989   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5990 
5991   *C = Cmat;
5992   PetscFunctionReturn(0);
5993 }
5994 
5995 /* ----------------------------------------------------------------*/
5996 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5997 {
5998   PetscErrorCode ierr;
5999 
6000   PetscFunctionBegin;
6001   if (scall == MAT_INITIAL_MATRIX) {
6002     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
6003     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
6004     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
6005   }
6006   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
6007   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
6008   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
6009   PetscFunctionReturn(0);
6010 }
6011 
6012 /*MC
6013    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6014 
6015    Options Database Keys:
6016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6017 
6018    Level: beginner
6019 
6020    Notes:
6021     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6022     in this case the values associated with the rows and columns one passes in are set to zero
6023     in the matrix
6024 
6025     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6026     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6027 
6028 .seealso: MatCreateAIJ()
6029 M*/
6030 
6031 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6032 {
6033   Mat_MPIAIJ     *b;
6034   PetscErrorCode ierr;
6035   PetscMPIInt    size;
6036 
6037   PetscFunctionBegin;
6038   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6039 
6040   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6041   B->data       = (void*)b;
6042   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6043   B->assembled  = PETSC_FALSE;
6044   B->insertmode = NOT_SET_VALUES;
6045   b->size       = size;
6046 
6047   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6048 
6049   /* build cache for off array entries formed */
6050   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6051 
6052   b->donotstash  = PETSC_FALSE;
6053   b->colmap      = 0;
6054   b->garray      = 0;
6055   b->roworiented = PETSC_TRUE;
6056 
6057   /* stuff used for matrix vector multiply */
6058   b->lvec  = NULL;
6059   b->Mvctx = NULL;
6060 
6061   /* stuff for MatGetRow() */
6062   b->rowindices   = 0;
6063   b->rowvalues    = 0;
6064   b->getrowactive = PETSC_FALSE;
6065 
6066   /* flexible pointer used in CUSP/CUSPARSE classes */
6067   b->spptr = NULL;
6068 
6069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6079 #if defined(PETSC_HAVE_MKL_SPARSE)
6080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6081 #endif
6082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6085 #if defined(PETSC_HAVE_ELEMENTAL)
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6087 #endif
6088 #if defined(PETSC_HAVE_HYPRE)
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6090 #endif
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6096 #if defined(PETSC_HAVE_HYPRE)
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6098 #endif
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6100   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6101   PetscFunctionReturn(0);
6102 }
6103 
6104 /*@C
6105      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6106          and "off-diagonal" part of the matrix in CSR format.
6107 
6108    Collective
6109 
6110    Input Parameters:
6111 +  comm - MPI communicator
6112 .  m - number of local rows (Cannot be PETSC_DECIDE)
6113 .  n - This value should be the same as the local size used in creating the
6114        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6115        calculated if N is given) For square matrices n is almost always m.
6116 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6117 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6118 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6119 .   j - column indices
6120 .   a - matrix values
6121 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6122 .   oj - column indices
6123 -   oa - matrix values
6124 
6125    Output Parameter:
6126 .   mat - the matrix
6127 
6128    Level: advanced
6129 
6130    Notes:
6131        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6132        must free the arrays once the matrix has been destroyed and not before.
6133 
6134        The i and j indices are 0 based
6135 
6136        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6137 
6138        This sets local rows and cannot be used to set off-processor values.
6139 
6140        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6141        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6142        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6143        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6144        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6145        communication if it is known that only local entries will be set.
6146 
6147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6148           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6149 @*/
6150 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6151 {
6152   PetscErrorCode ierr;
6153   Mat_MPIAIJ     *maij;
6154 
6155   PetscFunctionBegin;
6156   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6157   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6158   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6159   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6160   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6161   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6162   maij = (Mat_MPIAIJ*) (*mat)->data;
6163 
6164   (*mat)->preallocated = PETSC_TRUE;
6165 
6166   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6167   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6168 
6169   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6170   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6171 
6172   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6173   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6174   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6175   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6176 
6177   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6178   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6179   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6180   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6181   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6182   PetscFunctionReturn(0);
6183 }
6184 
6185 /*
6186     Special version for direct calls from Fortran
6187 */
6188 #include <petsc/private/fortranimpl.h>
6189 
6190 /* Change these macros so can be used in void function */
6191 #undef CHKERRQ
6192 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6193 #undef SETERRQ2
6194 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6195 #undef SETERRQ3
6196 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6197 #undef SETERRQ
6198 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6199 
6200 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6201 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6202 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6203 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6204 #else
6205 #endif
6206 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6207 {
6208   Mat            mat  = *mmat;
6209   PetscInt       m    = *mm, n = *mn;
6210   InsertMode     addv = *maddv;
6211   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6212   PetscScalar    value;
6213   PetscErrorCode ierr;
6214 
6215   MatCheckPreallocated(mat,1);
6216   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6217 
6218 #if defined(PETSC_USE_DEBUG)
6219   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6220 #endif
6221   {
6222     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6223     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6224     PetscBool roworiented = aij->roworiented;
6225 
6226     /* Some Variables required in the macro */
6227     Mat        A                    = aij->A;
6228     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6229     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6230     MatScalar  *aa                  = a->a;
6231     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6232     Mat        B                    = aij->B;
6233     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6234     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6235     MatScalar  *ba                  = b->a;
6236     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6237      * cannot use "#if defined" inside a macro. */
6238     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6239 
6240     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6241     PetscInt  nonew = a->nonew;
6242     MatScalar *ap1,*ap2;
6243 
6244     PetscFunctionBegin;
6245     for (i=0; i<m; i++) {
6246       if (im[i] < 0) continue;
6247 #if defined(PETSC_USE_DEBUG)
6248       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6249 #endif
6250       if (im[i] >= rstart && im[i] < rend) {
6251         row      = im[i] - rstart;
6252         lastcol1 = -1;
6253         rp1      = aj + ai[row];
6254         ap1      = aa + ai[row];
6255         rmax1    = aimax[row];
6256         nrow1    = ailen[row];
6257         low1     = 0;
6258         high1    = nrow1;
6259         lastcol2 = -1;
6260         rp2      = bj + bi[row];
6261         ap2      = ba + bi[row];
6262         rmax2    = bimax[row];
6263         nrow2    = bilen[row];
6264         low2     = 0;
6265         high2    = nrow2;
6266 
6267         for (j=0; j<n; j++) {
6268           if (roworiented) value = v[i*n+j];
6269           else value = v[i+j*m];
6270           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6271           if (in[j] >= cstart && in[j] < cend) {
6272             col = in[j] - cstart;
6273             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6275             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6276 #endif
6277           } else if (in[j] < 0) continue;
6278 #if defined(PETSC_USE_DEBUG)
6279           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6280           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6281 #endif
6282           else {
6283             if (mat->was_assembled) {
6284               if (!aij->colmap) {
6285                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6286               }
6287 #if defined(PETSC_USE_CTABLE)
6288               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6289               col--;
6290 #else
6291               col = aij->colmap[in[j]] - 1;
6292 #endif
6293               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6294                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6295                 col  =  in[j];
6296                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6297                 B        = aij->B;
6298                 b        = (Mat_SeqAIJ*)B->data;
6299                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6300                 rp2      = bj + bi[row];
6301                 ap2      = ba + bi[row];
6302                 rmax2    = bimax[row];
6303                 nrow2    = bilen[row];
6304                 low2     = 0;
6305                 high2    = nrow2;
6306                 bm       = aij->B->rmap->n;
6307                 ba       = b->a;
6308                 inserted = PETSC_FALSE;
6309               }
6310             } else col = in[j];
6311             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6312 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6313             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6314 #endif
6315           }
6316         }
6317       } else if (!aij->donotstash) {
6318         if (roworiented) {
6319           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6320         } else {
6321           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6322         }
6323       }
6324     }
6325   }
6326   PetscFunctionReturnVoid();
6327 }
6328