xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1fd50544974b13f3c620afb9f00a06d0c04fb4fd)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
584 #endif
585     if (im[i] >= rstart && im[i] < rend) {
586       row      = im[i] - rstart;
587       lastcol1 = -1;
588       rp1      = aj + ai[row];
589       ap1      = aa + ai[row];
590       rmax1    = aimax[row];
591       nrow1    = ailen[row];
592       low1     = 0;
593       high1    = nrow1;
594       lastcol2 = -1;
595       rp2      = bj + bi[row];
596       ap2      = ba + bi[row];
597       rmax2    = bimax[row];
598       nrow2    = bilen[row];
599       low2     = 0;
600       high2    = nrow2;
601 
602       for (j=0; j<n; j++) {
603         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
604         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
605         if (in[j] >= cstart && in[j] < cend) {
606           col   = in[j] - cstart;
607           nonew = a->nonew;
608           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
610           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
611 #endif
612         } else if (in[j] < 0) continue;
613 #if defined(PETSC_USE_DEBUG)
614         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
615 #endif
616         else {
617           if (mat->was_assembled) {
618             if (!aij->colmap) {
619               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
620             }
621 #if defined(PETSC_USE_CTABLE)
622             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
623             col--;
624 #else
625             col = aij->colmap[in[j]] - 1;
626 #endif
627             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
628               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
629               col  =  in[j];
630               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
631               B        = aij->B;
632               b        = (Mat_SeqAIJ*)B->data;
633               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
634               rp2      = bj + bi[row];
635               ap2      = ba + bi[row];
636               rmax2    = bimax[row];
637               nrow2    = bilen[row];
638               low2     = 0;
639               high2    = nrow2;
640               bm       = aij->B->rmap->n;
641               ba       = b->a;
642               inserted = PETSC_FALSE;
643             } else if (col < 0) {
644               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
645                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
646               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
647             }
648           } else col = in[j];
649           nonew = b->nonew;
650           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
652           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
653 #endif
654         }
655       }
656     } else {
657       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
658       if (!aij->donotstash) {
659         mat->assembled = PETSC_FALSE;
660         if (roworiented) {
661           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
662         } else {
663           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
664         }
665       }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
673     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
674     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
675 */
676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
677 {
678   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
679   Mat            A           = aij->A; /* diagonal part of the matrix */
680   Mat            B           = aij->B; /* offdiagonal part of the matrix */
681   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
682   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
683   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
684   PetscInt       *ailen      = a->ilen,*aj = a->j;
685   PetscInt       *bilen      = b->ilen,*bj = b->j;
686   PetscInt       am          = aij->A->rmap->n,j;
687   PetscInt       diag_so_far = 0,dnz;
688   PetscInt       offd_so_far = 0,onz;
689 
690   PetscFunctionBegin;
691   /* Iterate over all rows of the matrix */
692   for (j=0; j<am; j++) {
693     dnz = onz = 0;
694     /*  Iterate over all non-zero columns of the current row */
695     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
696       /* If column is in the diagonal */
697       if (mat_j[col] >= cstart && mat_j[col] < cend) {
698         aj[diag_so_far++] = mat_j[col] - cstart;
699         dnz++;
700       } else { /* off-diagonal entries */
701         bj[offd_so_far++] = mat_j[col];
702         onz++;
703       }
704     }
705     ailen[j] = dnz;
706     bilen[j] = onz;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 /*
712     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
713     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
714     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
715     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
716     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
717 */
718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
719 {
720   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
721   Mat            A      = aij->A; /* diagonal part of the matrix */
722   Mat            B      = aij->B; /* offdiagonal part of the matrix */
723   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
724   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
725   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
727   PetscInt       *ailen = a->ilen,*aj = a->j;
728   PetscInt       *bilen = b->ilen,*bj = b->j;
729   PetscInt       am     = aij->A->rmap->n,j;
730   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
731   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
732   PetscScalar    *aa = a->a,*ba = b->a;
733 
734   PetscFunctionBegin;
735   /* Iterate over all rows of the matrix */
736   for (j=0; j<am; j++) {
737     dnz_row = onz_row = 0;
738     rowstart_offd = full_offd_i[j];
739     rowstart_diag = full_diag_i[j];
740     /*  Iterate over all non-zero columns of the current row */
741     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
742       /* If column is in the diagonal */
743       if (mat_j[col] >= cstart && mat_j[col] < cend) {
744         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
745         aa[rowstart_diag+dnz_row] = mat_a[col];
746         dnz_row++;
747       } else { /* off-diagonal entries */
748         bj[rowstart_offd+onz_row] = mat_j[col];
749         ba[rowstart_offd+onz_row] = mat_a[col];
750         onz_row++;
751       }
752     }
753     ailen[j] = dnz_row;
754     bilen[j] = onz_row;
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
764   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
765 
766   PetscFunctionBegin;
767   for (i=0; i<m; i++) {
768     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
769     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
770     if (idxm[i] >= rstart && idxm[i] < rend) {
771       row = idxm[i] - rstart;
772       for (j=0; j<n; j++) {
773         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
774         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
775         if (idxn[j] >= cstart && idxn[j] < cend) {
776           col  = idxn[j] - cstart;
777           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
778         } else {
779           if (!aij->colmap) {
780             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
781           }
782 #if defined(PETSC_USE_CTABLE)
783           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
784           col--;
785 #else
786           col = aij->colmap[idxn[j]] - 1;
787 #endif
788           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
789           else {
790             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
800 
801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   PetscErrorCode ierr;
805   PetscInt       nstash,reallocs;
806 
807   PetscFunctionBegin;
808   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
812   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
819   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
820   PetscErrorCode ierr;
821   PetscMPIInt    n;
822   PetscInt       i,j,rstart,ncols,flg;
823   PetscInt       *row,*col;
824   PetscBool      other_disassembled;
825   PetscScalar    *val;
826 
827   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
828 
829   PetscFunctionBegin;
830   if (!aij->donotstash && !mat->nooffprocentries) {
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for (i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for (j=i,rstart=row[j]; j<n; j++) {
838           if (row[j] != rstart) break;
839         }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         /* Now assemble all these values with a single function call */
843         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
844 
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849   }
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113 
1114   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135   VecScatter     Mvctx = a->Mvctx;
1136 
1137   PetscFunctionBegin;
1138   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1139   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   /* do nondiagonal part */
1153   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1154   /* do local part */
1155   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1156   /* add partial results together */
1157   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1163 {
1164   MPI_Comm       comm;
1165   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1166   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1167   IS             Me,Notme;
1168   PetscErrorCode ierr;
1169   PetscInt       M,N,first,last,*notme,i;
1170   PetscBool      lf;
1171   PetscMPIInt    size;
1172 
1173   PetscFunctionBegin;
1174   /* Easy test: symmetric diagonal block */
1175   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1176   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1177   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1178   if (!*f) PetscFunctionReturn(0);
1179   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1181   if (size == 1) PetscFunctionReturn(0);
1182 
1183   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1184   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1185   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1186   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1187   for (i=0; i<first; i++) notme[i] = i;
1188   for (i=last; i<M; i++) notme[i-last+first] = i;
1189   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1190   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1191   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1192   Aoff = Aoffs[0];
1193   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1194   Boff = Boffs[0];
1195   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1197   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1199   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1200   ierr = PetscFree(notme);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1205 {
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214 {
1215   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   /* do nondiagonal part */
1220   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1221   /* do local part */
1222   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1223   /* add partial results together */
1224   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*
1230   This only works correctly for square matrices where the subblock A->A is the
1231    diagonal block
1232 */
1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1234 {
1235   PetscErrorCode ierr;
1236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1246 {
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1252   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1257 {
1258   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262 #if defined(PETSC_USE_LOG)
1263   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1264 #endif
1265   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1266   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1268   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1269 #if defined(PETSC_USE_CTABLE)
1270   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1271 #else
1272   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1273 #endif
1274   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1275   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1276   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1277   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1278   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1279   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1280   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1281 
1282   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1292 #if defined(PETSC_HAVE_ELEMENTAL)
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1294 #endif
1295 #if defined(PETSC_HAVE_HYPRE)
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1305 {
1306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1307   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1308   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1309   PetscErrorCode ierr;
1310   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1311   int            fd;
1312   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1313   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1314   PetscScalar    *column_values;
1315   PetscInt       message_count,flowcontrolcount;
1316   FILE           *file;
1317 
1318   PetscFunctionBegin;
1319   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1320   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1321   nz   = A->nz + B->nz;
1322   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1323   if (!rank) {
1324     header[0] = MAT_FILE_CLASSID;
1325     header[1] = mat->rmap->N;
1326     header[2] = mat->cmap->N;
1327 
1328     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1329     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     /* get largest number of rows any processor has */
1331     rlen  = mat->rmap->n;
1332     range = mat->rmap->range;
1333     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1334   } else {
1335     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336     rlen = mat->rmap->n;
1337   }
1338 
1339   /* load up the local row counts */
1340   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1341   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1342 
1343   /* store the row lengths to the file */
1344   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1345   if (!rank) {
1346     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       rlen = range[i+1] - range[i];
1350       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1351       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     }
1353     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1354   } else {
1355     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1356     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1358   }
1359   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1360 
1361   /* load up the local column indices */
1362   nzmax = nz; /* th processor needs space a largest processor needs */
1363   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1364   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1365   cnt   = 0;
1366   for (i=0; i<mat->rmap->n; i++) {
1367     for (j=B->i[i]; j<B->i[i+1]; j++) {
1368       if ((col = garray[B->j[j]]) > cstart) break;
1369       column_indices[cnt++] = col;
1370     }
1371     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1372     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1373   }
1374   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1375 
1376   /* store the column indices to the file */
1377   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1378   if (!rank) {
1379     MPI_Status status;
1380     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1381     for (i=1; i<size; i++) {
1382       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1383       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1384       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1385       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1386       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1387     }
1388     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1389   } else {
1390     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1391     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1392     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1393     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1394   }
1395   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1396 
1397   /* load up the local column values */
1398   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1399   cnt  = 0;
1400   for (i=0; i<mat->rmap->n; i++) {
1401     for (j=B->i[i]; j<B->i[i+1]; j++) {
1402       if (garray[B->j[j]] > cstart) break;
1403       column_values[cnt++] = B->a[j];
1404     }
1405     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1406     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1407   }
1408   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1409 
1410   /* store the column values to the file */
1411   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1412   if (!rank) {
1413     MPI_Status status;
1414     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1415     for (i=1; i<size; i++) {
1416       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1417       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1418       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1419       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1420       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1421     }
1422     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1423   } else {
1424     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1425     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1426     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1427     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1428   }
1429   ierr = PetscFree(column_values);CHKERRQ(ierr);
1430 
1431   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1432   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #include <petscdraw.h>
1437 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1438 {
1439   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1440   PetscErrorCode    ierr;
1441   PetscMPIInt       rank = aij->rank,size = aij->size;
1442   PetscBool         isdraw,iascii,isbinary;
1443   PetscViewer       sviewer;
1444   PetscViewerFormat format;
1445 
1446   PetscFunctionBegin;
1447   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1450   if (iascii) {
1451     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1452     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1453       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1454       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1455       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1456       for (i=0; i<(PetscInt)size; i++) {
1457         nmax = PetscMax(nmax,nz[i]);
1458         nmin = PetscMin(nmin,nz[i]);
1459         navg += nz[i];
1460       }
1461       ierr = PetscFree(nz);CHKERRQ(ierr);
1462       navg = navg/size;
1463       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     }
1466     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1467     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1468       MatInfo   info;
1469       PetscBool inodes;
1470 
1471       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1472       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1473       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1475       if (!inodes) {
1476         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1477                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1478       } else {
1479         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1480                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1481       }
1482       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1483       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1484       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1485       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1486       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1489       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1490       PetscFunctionReturn(0);
1491     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1492       PetscInt inodecount,inodelimit,*inodes;
1493       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1494       if (inodes) {
1495         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1496       } else {
1497         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1498       }
1499       PetscFunctionReturn(0);
1500     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1501       PetscFunctionReturn(0);
1502     }
1503   } else if (isbinary) {
1504     if (size == 1) {
1505       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507     } else {
1508       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1509     }
1510     PetscFunctionReturn(0);
1511   } else if (iascii && size == 1) {
1512     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1513     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1514     PetscFunctionReturn(0);
1515   } else if (isdraw) {
1516     PetscDraw draw;
1517     PetscBool isnull;
1518     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1519     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1520     if (isnull) PetscFunctionReturn(0);
1521   }
1522 
1523   { /* assemble the entire matrix onto first processor */
1524     Mat A = NULL, Av;
1525     IS  isrow,iscol;
1526 
1527     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1528     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1529     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1530     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1531 /*  The commented code uses MatCreateSubMatrices instead */
1532 /*
1533     Mat *AA, A = NULL, Av;
1534     IS  isrow,iscol;
1535 
1536     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1537     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1538     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1539     if (!rank) {
1540        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1541        A    = AA[0];
1542        Av   = AA[0];
1543     }
1544     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1545 */
1546     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1547     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1548     /*
1549        Everyone has to call to draw the matrix since the graphics waits are
1550        synchronized across all processors that share the PetscDraw object
1551     */
1552     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1553     if (!rank) {
1554       if (((PetscObject)mat)->name) {
1555         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1556       }
1557       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1558     }
1559     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1560     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(&A);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1567 {
1568   PetscErrorCode ierr;
1569   PetscBool      iascii,isdraw,issocket,isbinary;
1570 
1571   PetscFunctionBegin;
1572   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1573   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1574   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1575   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1576   if (iascii || isdraw || isbinary || issocket) {
1577     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1583 {
1584   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1585   PetscErrorCode ierr;
1586   Vec            bb1 = 0;
1587   PetscBool      hasop;
1588 
1589   PetscFunctionBegin;
1590   if (flag == SOR_APPLY_UPPER) {
1591     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1592     PetscFunctionReturn(0);
1593   }
1594 
1595   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1597   }
1598 
1599   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1600     if (flag & SOR_ZERO_INITIAL_GUESS) {
1601       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602       its--;
1603     }
1604 
1605     while (its--) {
1606       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608 
1609       /* update rhs: bb1 = bb - B*x */
1610       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1611       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1612 
1613       /* local sweep */
1614       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1615     }
1616   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1617     if (flag & SOR_ZERO_INITIAL_GUESS) {
1618       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1619       its--;
1620     }
1621     while (its--) {
1622       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1623       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624 
1625       /* update rhs: bb1 = bb - B*x */
1626       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1627       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1628 
1629       /* local sweep */
1630       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1631     }
1632   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1633     if (flag & SOR_ZERO_INITIAL_GUESS) {
1634       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1635       its--;
1636     }
1637     while (its--) {
1638       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1639       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640 
1641       /* update rhs: bb1 = bb - B*x */
1642       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1643       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1644 
1645       /* local sweep */
1646       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1647     }
1648   } else if (flag & SOR_EISENSTAT) {
1649     Vec xx1;
1650 
1651     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1652     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1653 
1654     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1655     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     if (!mat->diag) {
1657       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1658       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1659     }
1660     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1661     if (hasop) {
1662       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1663     } else {
1664       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1665     }
1666     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1667 
1668     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1669 
1670     /* local sweep */
1671     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1672     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1673     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1674   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1675 
1676   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1677 
1678   matin->factorerrortype = mat->A->factorerrortype;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1683 {
1684   Mat            aA,aB,Aperm;
1685   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1686   PetscScalar    *aa,*ba;
1687   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1688   PetscSF        rowsf,sf;
1689   IS             parcolp = NULL;
1690   PetscBool      done;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1695   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1696   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1697   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1698 
1699   /* Invert row permutation to find out where my rows should go */
1700   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1701   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1702   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1703   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1704   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1705   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1706 
1707   /* Invert column permutation to find out where my columns should go */
1708   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1709   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1710   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1711   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1712   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1713   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1714   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1715 
1716   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1717   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1718   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1719 
1720   /* Find out where my gcols should go */
1721   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1722   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1723   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1724   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1725   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1726   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1727   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1728   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1729 
1730   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1731   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1732   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1733   for (i=0; i<m; i++) {
1734     PetscInt    row = rdest[i];
1735     PetscMPIInt rowner;
1736     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1737     for (j=ai[i]; j<ai[i+1]; j++) {
1738       PetscInt    col = cdest[aj[j]];
1739       PetscMPIInt cowner;
1740       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1741       if (rowner == cowner) dnnz[i]++;
1742       else onnz[i]++;
1743     }
1744     for (j=bi[i]; j<bi[i+1]; j++) {
1745       PetscInt    col = gcdest[bj[j]];
1746       PetscMPIInt cowner;
1747       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1748       if (rowner == cowner) dnnz[i]++;
1749       else onnz[i]++;
1750     }
1751   }
1752   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1753   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1754   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1755   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1756   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1757 
1758   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1759   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1760   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1761   for (i=0; i<m; i++) {
1762     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1763     PetscInt j0,rowlen;
1764     rowlen = ai[i+1] - ai[i];
1765     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1766       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1767       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1768     }
1769     rowlen = bi[i+1] - bi[i];
1770     for (j0=j=0; j<rowlen; j0=j) {
1771       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1772       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1773     }
1774   }
1775   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1776   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1777   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1778   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1779   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1780   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1781   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1782   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1783   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1784   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1785   *B = Aperm;
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1790 {
1791   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1792   PetscErrorCode ierr;
1793 
1794   PetscFunctionBegin;
1795   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1796   if (ghosts) *ghosts = aij->garray;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1801 {
1802   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1803   Mat            A    = mat->A,B = mat->B;
1804   PetscErrorCode ierr;
1805   PetscLogDouble isend[5],irecv[5];
1806 
1807   PetscFunctionBegin;
1808   info->block_size = 1.0;
1809   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1810 
1811   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1812   isend[3] = info->memory;  isend[4] = info->mallocs;
1813 
1814   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1815 
1816   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1817   isend[3] += info->memory;  isend[4] += info->mallocs;
1818   if (flag == MAT_LOCAL) {
1819     info->nz_used      = isend[0];
1820     info->nz_allocated = isend[1];
1821     info->nz_unneeded  = isend[2];
1822     info->memory       = isend[3];
1823     info->mallocs      = isend[4];
1824   } else if (flag == MAT_GLOBAL_MAX) {
1825     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1826 
1827     info->nz_used      = irecv[0];
1828     info->nz_allocated = irecv[1];
1829     info->nz_unneeded  = irecv[2];
1830     info->memory       = irecv[3];
1831     info->mallocs      = irecv[4];
1832   } else if (flag == MAT_GLOBAL_SUM) {
1833     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1834 
1835     info->nz_used      = irecv[0];
1836     info->nz_allocated = irecv[1];
1837     info->nz_unneeded  = irecv[2];
1838     info->memory       = irecv[3];
1839     info->mallocs      = irecv[4];
1840   }
1841   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1842   info->fill_ratio_needed = 0;
1843   info->factor_mallocs    = 0;
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1848 {
1849   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1850   PetscErrorCode ierr;
1851 
1852   PetscFunctionBegin;
1853   switch (op) {
1854   case MAT_NEW_NONZERO_LOCATIONS:
1855   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1856   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1857   case MAT_KEEP_NONZERO_PATTERN:
1858   case MAT_NEW_NONZERO_LOCATION_ERR:
1859   case MAT_USE_INODES:
1860   case MAT_IGNORE_ZERO_ENTRIES:
1861     MatCheckPreallocated(A,1);
1862     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1863     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1864     break;
1865   case MAT_ROW_ORIENTED:
1866     MatCheckPreallocated(A,1);
1867     a->roworiented = flg;
1868 
1869     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1870     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1871     break;
1872   case MAT_NEW_DIAGONALS:
1873   case MAT_SORTED_FULL:
1874     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1875     break;
1876   case MAT_IGNORE_OFF_PROC_ENTRIES:
1877     a->donotstash = flg;
1878     break;
1879   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1880   case MAT_SPD:
1881   case MAT_SYMMETRIC:
1882   case MAT_STRUCTURALLY_SYMMETRIC:
1883   case MAT_HERMITIAN:
1884   case MAT_SYMMETRY_ETERNAL:
1885     break;
1886   case MAT_SUBMAT_SINGLEIS:
1887     A->submat_singleis = flg;
1888     break;
1889   case MAT_STRUCTURE_ONLY:
1890     /* The option is handled directly by MatSetOption() */
1891     break;
1892   default:
1893     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1899 {
1900   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1901   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1902   PetscErrorCode ierr;
1903   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1904   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1905   PetscInt       *cmap,*idx_p;
1906 
1907   PetscFunctionBegin;
1908   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1909   mat->getrowactive = PETSC_TRUE;
1910 
1911   if (!mat->rowvalues && (idx || v)) {
1912     /*
1913         allocate enough space to hold information from the longest row.
1914     */
1915     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1916     PetscInt   max = 1,tmp;
1917     for (i=0; i<matin->rmap->n; i++) {
1918       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1919       if (max < tmp) max = tmp;
1920     }
1921     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1922   }
1923 
1924   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1925   lrow = row - rstart;
1926 
1927   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1928   if (!v)   {pvA = 0; pvB = 0;}
1929   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1930   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1931   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1932   nztot = nzA + nzB;
1933 
1934   cmap = mat->garray;
1935   if (v  || idx) {
1936     if (nztot) {
1937       /* Sort by increasing column numbers, assuming A and B already sorted */
1938       PetscInt imark = -1;
1939       if (v) {
1940         *v = v_p = mat->rowvalues;
1941         for (i=0; i<nzB; i++) {
1942           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1943           else break;
1944         }
1945         imark = i;
1946         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1947         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1948       }
1949       if (idx) {
1950         *idx = idx_p = mat->rowindices;
1951         if (imark > -1) {
1952           for (i=0; i<imark; i++) {
1953             idx_p[i] = cmap[cworkB[i]];
1954           }
1955         } else {
1956           for (i=0; i<nzB; i++) {
1957             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1958             else break;
1959           }
1960           imark = i;
1961         }
1962         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1963         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1964       }
1965     } else {
1966       if (idx) *idx = 0;
1967       if (v)   *v   = 0;
1968     }
1969   }
1970   *nz  = nztot;
1971   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1972   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1977 {
1978   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1979 
1980   PetscFunctionBegin;
1981   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1982   aij->getrowactive = PETSC_FALSE;
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1987 {
1988   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1989   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1990   PetscErrorCode ierr;
1991   PetscInt       i,j,cstart = mat->cmap->rstart;
1992   PetscReal      sum = 0.0;
1993   MatScalar      *v;
1994 
1995   PetscFunctionBegin;
1996   if (aij->size == 1) {
1997     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1998   } else {
1999     if (type == NORM_FROBENIUS) {
2000       v = amat->a;
2001       for (i=0; i<amat->nz; i++) {
2002         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2003       }
2004       v = bmat->a;
2005       for (i=0; i<bmat->nz; i++) {
2006         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2007       }
2008       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2009       *norm = PetscSqrtReal(*norm);
2010       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
2011     } else if (type == NORM_1) { /* max column norm */
2012       PetscReal *tmp,*tmp2;
2013       PetscInt  *jj,*garray = aij->garray;
2014       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
2015       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
2016       *norm = 0.0;
2017       v     = amat->a; jj = amat->j;
2018       for (j=0; j<amat->nz; j++) {
2019         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2020       }
2021       v = bmat->a; jj = bmat->j;
2022       for (j=0; j<bmat->nz; j++) {
2023         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2024       }
2025       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2026       for (j=0; j<mat->cmap->N; j++) {
2027         if (tmp2[j] > *norm) *norm = tmp2[j];
2028       }
2029       ierr = PetscFree(tmp);CHKERRQ(ierr);
2030       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2031       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2032     } else if (type == NORM_INFINITY) { /* max row norm */
2033       PetscReal ntemp = 0.0;
2034       for (j=0; j<aij->A->rmap->n; j++) {
2035         v   = amat->a + amat->i[j];
2036         sum = 0.0;
2037         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2038           sum += PetscAbsScalar(*v); v++;
2039         }
2040         v = bmat->a + bmat->i[j];
2041         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2042           sum += PetscAbsScalar(*v); v++;
2043         }
2044         if (sum > ntemp) ntemp = sum;
2045       }
2046       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2047       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2048     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2049   }
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2054 {
2055   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2056   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2057   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2058   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2059   PetscErrorCode  ierr;
2060   Mat             B,A_diag,*B_diag;
2061   const MatScalar *array;
2062 
2063   PetscFunctionBegin;
2064   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2065   ai = Aloc->i; aj = Aloc->j;
2066   bi = Bloc->i; bj = Bloc->j;
2067   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2068     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2069     PetscSFNode          *oloc;
2070     PETSC_UNUSED PetscSF sf;
2071 
2072     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2073     /* compute d_nnz for preallocation */
2074     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2075     for (i=0; i<ai[ma]; i++) {
2076       d_nnz[aj[i]]++;
2077     }
2078     /* compute local off-diagonal contributions */
2079     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2080     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2081     /* map those to global */
2082     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2083     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2084     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2085     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2086     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2087     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2088     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2089 
2090     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2091     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2092     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2093     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2094     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2095     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2096   } else {
2097     B    = *matout;
2098     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2099   }
2100 
2101   b           = (Mat_MPIAIJ*)B->data;
2102   A_diag      = a->A;
2103   B_diag      = &b->A;
2104   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2105   A_diag_ncol = A_diag->cmap->N;
2106   B_diag_ilen = sub_B_diag->ilen;
2107   B_diag_i    = sub_B_diag->i;
2108 
2109   /* Set ilen for diagonal of B */
2110   for (i=0; i<A_diag_ncol; i++) {
2111     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2112   }
2113 
2114   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2115   very quickly (=without using MatSetValues), because all writes are local. */
2116   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2117 
2118   /* copy over the B part */
2119   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2120   array = Bloc->a;
2121   row   = A->rmap->rstart;
2122   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2123   cols_tmp = cols;
2124   for (i=0; i<mb; i++) {
2125     ncol = bi[i+1]-bi[i];
2126     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2127     row++;
2128     array += ncol; cols_tmp += ncol;
2129   }
2130   ierr = PetscFree(cols);CHKERRQ(ierr);
2131 
2132   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2133   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2135     *matout = B;
2136   } else {
2137     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2143 {
2144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2145   Mat            a    = aij->A,b = aij->B;
2146   PetscErrorCode ierr;
2147   PetscInt       s1,s2,s3;
2148 
2149   PetscFunctionBegin;
2150   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2151   if (rr) {
2152     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2153     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2154     /* Overlap communication with computation. */
2155     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2156   }
2157   if (ll) {
2158     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2159     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2160     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2161   }
2162   /* scale  the diagonal block */
2163   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2164 
2165   if (rr) {
2166     /* Do a scatter end and then right scale the off-diagonal block */
2167     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2168     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2169   }
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2174 {
2175   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2176   PetscErrorCode ierr;
2177 
2178   PetscFunctionBegin;
2179   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2184 {
2185   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2186   Mat            a,b,c,d;
2187   PetscBool      flg;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   a = matA->A; b = matA->B;
2192   c = matB->A; d = matB->B;
2193 
2194   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2195   if (flg) {
2196     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2197   }
2198   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2203 {
2204   PetscErrorCode ierr;
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2207 
2208   PetscFunctionBegin;
2209   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2210   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2211     /* because of the column compression in the off-processor part of the matrix a->B,
2212        the number of columns in a->B and b->B may be different, hence we cannot call
2213        the MatCopy() directly on the two parts. If need be, we can provide a more
2214        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2215        then copying the submatrices */
2216     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2217   } else {
2218     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2219     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2220   }
2221   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2226 {
2227   PetscErrorCode ierr;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 /*
2235    Computes the number of nonzeros per row needed for preallocation when X and Y
2236    have different nonzero structure.
2237 */
2238 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2239 {
2240   PetscInt       i,j,k,nzx,nzy;
2241 
2242   PetscFunctionBegin;
2243   /* Set the number of nonzeros in the new matrix */
2244   for (i=0; i<m; i++) {
2245     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2246     nzx = xi[i+1] - xi[i];
2247     nzy = yi[i+1] - yi[i];
2248     nnz[i] = 0;
2249     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2250       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2251       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2252       nnz[i]++;
2253     }
2254     for (; k<nzy; k++) nnz[i]++;
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2260 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2261 {
2262   PetscErrorCode ierr;
2263   PetscInt       m = Y->rmap->N;
2264   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2265   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2273 {
2274   PetscErrorCode ierr;
2275   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2276   PetscBLASInt   bnz,one=1;
2277   Mat_SeqAIJ     *x,*y;
2278 
2279   PetscFunctionBegin;
2280   if (str == SAME_NONZERO_PATTERN) {
2281     PetscScalar alpha = a;
2282     x    = (Mat_SeqAIJ*)xx->A->data;
2283     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2284     y    = (Mat_SeqAIJ*)yy->A->data;
2285     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2286     x    = (Mat_SeqAIJ*)xx->B->data;
2287     y    = (Mat_SeqAIJ*)yy->B->data;
2288     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2289     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2290     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2291     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2292        will be updated */
2293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2294     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2295       Y->offloadmask = PETSC_OFFLOAD_CPU;
2296     }
2297 #endif
2298   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2299     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2300   } else {
2301     Mat      B;
2302     PetscInt *nnz_d,*nnz_o;
2303     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2304     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2305     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2306     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2307     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2308     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2309     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2310     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2311     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2312     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2313     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2314     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2315     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2316     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2317   }
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2322 
2323 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2324 {
2325 #if defined(PETSC_USE_COMPLEX)
2326   PetscErrorCode ierr;
2327   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2331   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2332 #else
2333   PetscFunctionBegin;
2334 #endif
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2339 {
2340   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2341   PetscErrorCode ierr;
2342 
2343   PetscFunctionBegin;
2344   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2345   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2350 {
2351   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2356   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2361 {
2362   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2363   PetscErrorCode ierr;
2364   PetscInt       i,*idxb = 0;
2365   PetscScalar    *va,*vb;
2366   Vec            vtmp;
2367 
2368   PetscFunctionBegin;
2369   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2370   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2371   if (idx) {
2372     for (i=0; i<A->rmap->n; i++) {
2373       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2374     }
2375   }
2376 
2377   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2378   if (idx) {
2379     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2380   }
2381   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2382   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2383 
2384   for (i=0; i<A->rmap->n; i++) {
2385     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2386       va[i] = vb[i];
2387       if (idx) idx[i] = a->garray[idxb[i]];
2388     }
2389   }
2390 
2391   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2392   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2393   ierr = PetscFree(idxb);CHKERRQ(ierr);
2394   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2399 {
2400   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2401   PetscErrorCode ierr;
2402   PetscInt       i,*idxb = 0;
2403   PetscScalar    *va,*vb;
2404   Vec            vtmp;
2405 
2406   PetscFunctionBegin;
2407   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2408   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2409   if (idx) {
2410     for (i=0; i<A->cmap->n; i++) {
2411       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2412     }
2413   }
2414 
2415   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2416   if (idx) {
2417     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2418   }
2419   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2420   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2421 
2422   for (i=0; i<A->rmap->n; i++) {
2423     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2424       va[i] = vb[i];
2425       if (idx) idx[i] = a->garray[idxb[i]];
2426     }
2427   }
2428 
2429   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2430   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2431   ierr = PetscFree(idxb);CHKERRQ(ierr);
2432   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2437 {
2438   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2439   PetscInt       n      = A->rmap->n;
2440   PetscInt       cstart = A->cmap->rstart;
2441   PetscInt       *cmap  = mat->garray;
2442   PetscInt       *diagIdx, *offdiagIdx;
2443   Vec            diagV, offdiagV;
2444   PetscScalar    *a, *diagA, *offdiagA;
2445   PetscInt       r;
2446   PetscErrorCode ierr;
2447 
2448   PetscFunctionBegin;
2449   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2451   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2452   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2453   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2454   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2455   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2456   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2457   for (r = 0; r < n; ++r) {
2458     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2459       a[r]   = diagA[r];
2460       idx[r] = cstart + diagIdx[r];
2461     } else {
2462       a[r]   = offdiagA[r];
2463       idx[r] = cmap[offdiagIdx[r]];
2464     }
2465   }
2466   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2468   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2469   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2470   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2471   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2476 {
2477   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2478   PetscInt       n      = A->rmap->n;
2479   PetscInt       cstart = A->cmap->rstart;
2480   PetscInt       *cmap  = mat->garray;
2481   PetscInt       *diagIdx, *offdiagIdx;
2482   Vec            diagV, offdiagV;
2483   PetscScalar    *a, *diagA, *offdiagA;
2484   PetscInt       r;
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2489   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2490   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2491   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2492   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2493   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2494   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2495   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2496   for (r = 0; r < n; ++r) {
2497     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2498       a[r]   = diagA[r];
2499       idx[r] = cstart + diagIdx[r];
2500     } else {
2501       a[r]   = offdiagA[r];
2502       idx[r] = cmap[offdiagIdx[r]];
2503     }
2504   }
2505   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2509   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2510   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2515 {
2516   PetscErrorCode ierr;
2517   Mat            *dummy;
2518 
2519   PetscFunctionBegin;
2520   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2521   *newmat = *dummy;
2522   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2527 {
2528   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2529   PetscErrorCode ierr;
2530 
2531   PetscFunctionBegin;
2532   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2533   A->factorerrortype = a->A->factorerrortype;
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2538 {
2539   PetscErrorCode ierr;
2540   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2541 
2542   PetscFunctionBegin;
2543   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2544   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2545   if (x->assembled) {
2546     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2547   } else {
2548     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2549   }
2550   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2551   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2556 {
2557   PetscFunctionBegin;
2558   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2559   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 /*@
2564    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2565 
2566    Collective on Mat
2567 
2568    Input Parameters:
2569 +    A - the matrix
2570 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2571 
2572  Level: advanced
2573 
2574 @*/
2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2576 {
2577   PetscErrorCode       ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2585 {
2586   PetscErrorCode       ierr;
2587   PetscBool            sc = PETSC_FALSE,flg;
2588 
2589   PetscFunctionBegin;
2590   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2591   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2592   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2593   if (flg) {
2594     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2595   }
2596   ierr = PetscOptionsTail();CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2601 {
2602   PetscErrorCode ierr;
2603   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2604   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2605 
2606   PetscFunctionBegin;
2607   if (!Y->preallocated) {
2608     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2609   } else if (!aij->nz) {
2610     PetscInt nonew = aij->nonew;
2611     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2612     aij->nonew = nonew;
2613   }
2614   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2619 {
2620   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2621   PetscErrorCode ierr;
2622 
2623   PetscFunctionBegin;
2624   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2625   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2626   if (d) {
2627     PetscInt rstart;
2628     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2629     *d += rstart;
2630 
2631   }
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2636 {
2637   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2638   PetscErrorCode ierr;
2639 
2640   PetscFunctionBegin;
2641   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 /* -------------------------------------------------------------------*/
2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2647                                        MatGetRow_MPIAIJ,
2648                                        MatRestoreRow_MPIAIJ,
2649                                        MatMult_MPIAIJ,
2650                                 /* 4*/ MatMultAdd_MPIAIJ,
2651                                        MatMultTranspose_MPIAIJ,
2652                                        MatMultTransposeAdd_MPIAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*10*/ 0,
2657                                        0,
2658                                        0,
2659                                        MatSOR_MPIAIJ,
2660                                        MatTranspose_MPIAIJ,
2661                                 /*15*/ MatGetInfo_MPIAIJ,
2662                                        MatEqual_MPIAIJ,
2663                                        MatGetDiagonal_MPIAIJ,
2664                                        MatDiagonalScale_MPIAIJ,
2665                                        MatNorm_MPIAIJ,
2666                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2667                                        MatAssemblyEnd_MPIAIJ,
2668                                        MatSetOption_MPIAIJ,
2669                                        MatZeroEntries_MPIAIJ,
2670                                 /*24*/ MatZeroRows_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*29*/ MatSetUp_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        MatGetDiagonalBlock_MPIAIJ,
2679                                        0,
2680                                 /*34*/ MatDuplicate_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*39*/ MatAXPY_MPIAIJ,
2686                                        MatCreateSubMatrices_MPIAIJ,
2687                                        MatIncreaseOverlap_MPIAIJ,
2688                                        MatGetValues_MPIAIJ,
2689                                        MatCopy_MPIAIJ,
2690                                 /*44*/ MatGetRowMax_MPIAIJ,
2691                                        MatScale_MPIAIJ,
2692                                        MatShift_MPIAIJ,
2693                                        MatDiagonalSet_MPIAIJ,
2694                                        MatZeroRowsColumns_MPIAIJ,
2695                                 /*49*/ MatSetRandom_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2701                                        0,
2702                                        MatSetUnfactored_MPIAIJ,
2703                                        MatPermute_MPIAIJ,
2704                                        0,
2705                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2706                                        MatDestroy_MPIAIJ,
2707                                        MatView_MPIAIJ,
2708                                        0,
2709                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2710                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2711                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2716                                        MatGetRowMinAbs_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                 /*75*/ MatFDColoringApply_AIJ,
2722                                        MatSetFromOptions_MPIAIJ,
2723                                        0,
2724                                        0,
2725                                        MatFindZeroDiagonals_MPIAIJ,
2726                                 /*80*/ 0,
2727                                        0,
2728                                        0,
2729                                 /*83*/ MatLoad_MPIAIJ,
2730                                        MatIsSymmetric_MPIAIJ,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2736                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2737                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2738                                        MatPtAP_MPIAIJ_MPIAIJ,
2739                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2740                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2741                                        0,
2742                                        0,
2743                                        0,
2744                                        MatBindToCPU_MPIAIJ,
2745                                 /*99*/ 0,
2746                                        0,
2747                                        0,
2748                                        MatConjugate_MPIAIJ,
2749                                        0,
2750                                 /*104*/MatSetValuesRow_MPIAIJ,
2751                                        MatRealPart_MPIAIJ,
2752                                        MatImaginaryPart_MPIAIJ,
2753                                        0,
2754                                        0,
2755                                 /*109*/0,
2756                                        0,
2757                                        MatGetRowMin_MPIAIJ,
2758                                        0,
2759                                        MatMissingDiagonal_MPIAIJ,
2760                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2761                                        0,
2762                                        MatGetGhosts_MPIAIJ,
2763                                        0,
2764                                        0,
2765                                 /*119*/0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                        MatGetMultiProcBlock_MPIAIJ,
2770                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2771                                        MatGetColumnNorms_MPIAIJ,
2772                                        MatInvertBlockDiagonal_MPIAIJ,
2773                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2774                                        MatCreateSubMatricesMPI_MPIAIJ,
2775                                 /*129*/0,
2776                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2777                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2778                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2779                                        0,
2780                                 /*134*/0,
2781                                        0,
2782                                        MatRARt_MPIAIJ_MPIAIJ,
2783                                        0,
2784                                        0,
2785                                 /*139*/MatSetBlockSizes_MPIAIJ,
2786                                        0,
2787                                        0,
2788                                        MatFDColoringSetUp_MPIXAIJ,
2789                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2790                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2791 };
2792 
2793 /* ----------------------------------------------------------------------------------------*/
2794 
2795 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2796 {
2797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2802   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2807 {
2808   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2813   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2818 {
2819   Mat_MPIAIJ     *b;
2820   PetscErrorCode ierr;
2821   PetscMPIInt    size;
2822 
2823   PetscFunctionBegin;
2824   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2826   b = (Mat_MPIAIJ*)B->data;
2827 
2828 #if defined(PETSC_USE_CTABLE)
2829   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2830 #else
2831   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2832 #endif
2833   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2834   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2835   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2836 
2837   /* Because the B will have been resized we simply destroy it and create a new one each time */
2838   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2839   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2840   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2841   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2842   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2843   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2844   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2845 
2846   if (!B->preallocated) {
2847     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2848     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2849     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2850     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2851     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2852   }
2853 
2854   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2855   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2856   B->preallocated  = PETSC_TRUE;
2857   B->was_assembled = PETSC_FALSE;
2858   B->assembled     = PETSC_FALSE;
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2863 {
2864   Mat_MPIAIJ     *b;
2865   PetscErrorCode ierr;
2866 
2867   PetscFunctionBegin;
2868   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2869   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2870   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2871   b = (Mat_MPIAIJ*)B->data;
2872 
2873 #if defined(PETSC_USE_CTABLE)
2874   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2875 #else
2876   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2877 #endif
2878   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2879   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2880   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2881 
2882   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2883   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2884   B->preallocated  = PETSC_TRUE;
2885   B->was_assembled = PETSC_FALSE;
2886   B->assembled = PETSC_FALSE;
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2891 {
2892   Mat            mat;
2893   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2894   PetscErrorCode ierr;
2895 
2896   PetscFunctionBegin;
2897   *newmat = 0;
2898   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2899   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2900   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2901   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2902   a       = (Mat_MPIAIJ*)mat->data;
2903 
2904   mat->factortype   = matin->factortype;
2905   mat->assembled    = PETSC_TRUE;
2906   mat->insertmode   = NOT_SET_VALUES;
2907   mat->preallocated = PETSC_TRUE;
2908 
2909   a->size         = oldmat->size;
2910   a->rank         = oldmat->rank;
2911   a->donotstash   = oldmat->donotstash;
2912   a->roworiented  = oldmat->roworiented;
2913   a->rowindices   = 0;
2914   a->rowvalues    = 0;
2915   a->getrowactive = PETSC_FALSE;
2916 
2917   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2918   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2919 
2920   if (oldmat->colmap) {
2921 #if defined(PETSC_USE_CTABLE)
2922     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2923 #else
2924     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2925     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2926     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2927 #endif
2928   } else a->colmap = 0;
2929   if (oldmat->garray) {
2930     PetscInt len;
2931     len  = oldmat->B->cmap->n;
2932     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2933     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2934     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2935   } else a->garray = 0;
2936 
2937   /* It may happen MatDuplicate is called with a non-assembled matrix
2938      In fact, MatDuplicate only requires the matrix to be preallocated
2939      This may happen inside a DMCreateMatrix_Shell */
2940   if (oldmat->lvec) {
2941     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2942     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2943   }
2944   if (oldmat->Mvctx) {
2945     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2946     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2947   }
2948   if (oldmat->Mvctx_mpi1) {
2949     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2950     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2951   }
2952 
2953   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2954   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2955   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2956   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2957   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2958   *newmat = mat;
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2963 {
2964   PetscBool      isbinary, ishdf5;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2969   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2970   /* force binary viewer to load .info file if it has not yet done so */
2971   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2972   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2973   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2974   if (isbinary) {
2975     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2976   } else if (ishdf5) {
2977 #if defined(PETSC_HAVE_HDF5)
2978     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2979 #else
2980     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2981 #endif
2982   } else {
2983     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2984   }
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2989 {
2990   PetscScalar    *vals,*svals;
2991   MPI_Comm       comm;
2992   PetscErrorCode ierr;
2993   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2994   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2995   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2996   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2997   PetscInt       cend,cstart,n,*rowners;
2998   int            fd;
2999   PetscInt       bs = newMat->rmap->bs;
3000 
3001   PetscFunctionBegin;
3002   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3003   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3004   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3005   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3006   if (!rank) {
3007     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3008     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3009     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
3010   }
3011 
3012   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
3013   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3014   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3015   if (bs < 0) bs = 1;
3016 
3017   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3018   M    = header[1]; N = header[2];
3019 
3020   /* If global sizes are set, check if they are consistent with that given in the file */
3021   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3022   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3023 
3024   /* determine ownership of all (block) rows */
3025   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3026   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3027   else m = newMat->rmap->n; /* Set by user */
3028 
3029   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3030   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3031 
3032   /* First process needs enough room for process with most rows */
3033   if (!rank) {
3034     mmax = rowners[1];
3035     for (i=2; i<=size; i++) {
3036       mmax = PetscMax(mmax, rowners[i]);
3037     }
3038   } else mmax = -1;             /* unused, but compilers complain */
3039 
3040   rowners[0] = 0;
3041   for (i=2; i<=size; i++) {
3042     rowners[i] += rowners[i-1];
3043   }
3044   rstart = rowners[rank];
3045   rend   = rowners[rank+1];
3046 
3047   /* distribute row lengths to all processors */
3048   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3049   if (!rank) {
3050     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3051     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3052     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3053     for (j=0; j<m; j++) {
3054       procsnz[0] += ourlens[j];
3055     }
3056     for (i=1; i<size; i++) {
3057       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3058       /* calculate the number of nonzeros on each processor */
3059       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3060         procsnz[i] += rowlengths[j];
3061       }
3062       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3063     }
3064     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3065   } else {
3066     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3067   }
3068 
3069   if (!rank) {
3070     /* determine max buffer needed and allocate it */
3071     maxnz = 0;
3072     for (i=0; i<size; i++) {
3073       maxnz = PetscMax(maxnz,procsnz[i]);
3074     }
3075     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3076 
3077     /* read in my part of the matrix column indices  */
3078     nz   = procsnz[0];
3079     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3080     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3081 
3082     /* read in every one elses and ship off */
3083     for (i=1; i<size; i++) {
3084       nz   = procsnz[i];
3085       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3086       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3087     }
3088     ierr = PetscFree(cols);CHKERRQ(ierr);
3089   } else {
3090     /* determine buffer space needed for message */
3091     nz = 0;
3092     for (i=0; i<m; i++) {
3093       nz += ourlens[i];
3094     }
3095     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3096 
3097     /* receive message of column indices*/
3098     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3099   }
3100 
3101   /* determine column ownership if matrix is not square */
3102   if (N != M) {
3103     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3104     else n = newMat->cmap->n;
3105     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3106     cstart = cend - n;
3107   } else {
3108     cstart = rstart;
3109     cend   = rend;
3110     n      = cend - cstart;
3111   }
3112 
3113   /* loop over local rows, determining number of off diagonal entries */
3114   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3115   jj   = 0;
3116   for (i=0; i<m; i++) {
3117     for (j=0; j<ourlens[i]; j++) {
3118       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3119       jj++;
3120     }
3121   }
3122 
3123   for (i=0; i<m; i++) {
3124     ourlens[i] -= offlens[i];
3125   }
3126   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3127 
3128   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3129 
3130   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3131 
3132   for (i=0; i<m; i++) {
3133     ourlens[i] += offlens[i];
3134   }
3135 
3136   if (!rank) {
3137     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3138 
3139     /* read in my part of the matrix numerical values  */
3140     nz   = procsnz[0];
3141     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3142 
3143     /* insert into matrix */
3144     jj      = rstart;
3145     smycols = mycols;
3146     svals   = vals;
3147     for (i=0; i<m; i++) {
3148       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3149       smycols += ourlens[i];
3150       svals   += ourlens[i];
3151       jj++;
3152     }
3153 
3154     /* read in other processors and ship out */
3155     for (i=1; i<size; i++) {
3156       nz   = procsnz[i];
3157       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3158       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3159     }
3160     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3161   } else {
3162     /* receive numeric values */
3163     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3164 
3165     /* receive message of values*/
3166     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3167 
3168     /* insert into matrix */
3169     jj      = rstart;
3170     smycols = mycols;
3171     svals   = vals;
3172     for (i=0; i<m; i++) {
3173       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3174       smycols += ourlens[i];
3175       svals   += ourlens[i];
3176       jj++;
3177     }
3178   }
3179   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3180   ierr = PetscFree(vals);CHKERRQ(ierr);
3181   ierr = PetscFree(mycols);CHKERRQ(ierr);
3182   ierr = PetscFree(rowners);CHKERRQ(ierr);
3183   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3184   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3185   PetscFunctionReturn(0);
3186 }
3187 
3188 /* Not scalable because of ISAllGather() unless getting all columns. */
3189 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3190 {
3191   PetscErrorCode ierr;
3192   IS             iscol_local;
3193   PetscBool      isstride;
3194   PetscMPIInt    lisstride=0,gisstride;
3195 
3196   PetscFunctionBegin;
3197   /* check if we are grabbing all columns*/
3198   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3199 
3200   if (isstride) {
3201     PetscInt  start,len,mstart,mlen;
3202     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3203     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3204     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3205     if (mstart == start && mlen-mstart == len) lisstride = 1;
3206   }
3207 
3208   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3209   if (gisstride) {
3210     PetscInt N;
3211     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3212     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3213     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3214     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3215   } else {
3216     PetscInt cbs;
3217     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3218     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3219     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3220   }
3221 
3222   *isseq = iscol_local;
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 /*
3227  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3228  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3229 
3230  Input Parameters:
3231    mat - matrix
3232    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3233            i.e., mat->rstart <= isrow[i] < mat->rend
3234    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3235            i.e., mat->cstart <= iscol[i] < mat->cend
3236  Output Parameter:
3237    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3238    iscol_o - sequential column index set for retrieving mat->B
3239    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3240  */
3241 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3242 {
3243   PetscErrorCode ierr;
3244   Vec            x,cmap;
3245   const PetscInt *is_idx;
3246   PetscScalar    *xarray,*cmaparray;
3247   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3248   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3249   Mat            B=a->B;
3250   Vec            lvec=a->lvec,lcmap;
3251   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3252   MPI_Comm       comm;
3253   VecScatter     Mvctx=a->Mvctx;
3254 
3255   PetscFunctionBegin;
3256   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3257   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3258 
3259   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3260   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3261   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3262   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3263   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3264 
3265   /* Get start indices */
3266   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3267   isstart -= ncols;
3268   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3269 
3270   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3271   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3272   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3273   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3274   for (i=0; i<ncols; i++) {
3275     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3276     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3277     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3278   }
3279   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3280   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3281   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3282 
3283   /* Get iscol_d */
3284   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3285   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3286   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3287 
3288   /* Get isrow_d */
3289   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3290   rstart = mat->rmap->rstart;
3291   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3292   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3293   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3294   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3295 
3296   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3297   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3298   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3299 
3300   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3301   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3302   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3303 
3304   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3305 
3306   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3307   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3308 
3309   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3310   /* off-process column indices */
3311   count = 0;
3312   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3313   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3314 
3315   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3316   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3317   for (i=0; i<Bn; i++) {
3318     if (PetscRealPart(xarray[i]) > -1.0) {
3319       idx[count]     = i;                   /* local column index in off-diagonal part B */
3320       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3321       count++;
3322     }
3323   }
3324   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3325   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3326 
3327   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3328   /* cannot ensure iscol_o has same blocksize as iscol! */
3329 
3330   ierr = PetscFree(idx);CHKERRQ(ierr);
3331   *garray = cmap1;
3332 
3333   ierr = VecDestroy(&x);CHKERRQ(ierr);
3334   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3335   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3340 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3341 {
3342   PetscErrorCode ierr;
3343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3344   Mat            M = NULL;
3345   MPI_Comm       comm;
3346   IS             iscol_d,isrow_d,iscol_o;
3347   Mat            Asub = NULL,Bsub = NULL;
3348   PetscInt       n;
3349 
3350   PetscFunctionBegin;
3351   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3352 
3353   if (call == MAT_REUSE_MATRIX) {
3354     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3355     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3356     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3357 
3358     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3359     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3360 
3361     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3362     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3363 
3364     /* Update diagonal and off-diagonal portions of submat */
3365     asub = (Mat_MPIAIJ*)(*submat)->data;
3366     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3367     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3368     if (n) {
3369       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3370     }
3371     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3372     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3373 
3374   } else { /* call == MAT_INITIAL_MATRIX) */
3375     const PetscInt *garray;
3376     PetscInt        BsubN;
3377 
3378     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3379     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3380 
3381     /* Create local submatrices Asub and Bsub */
3382     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3383     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3384 
3385     /* Create submatrix M */
3386     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3387 
3388     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3389     asub = (Mat_MPIAIJ*)M->data;
3390 
3391     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3392     n = asub->B->cmap->N;
3393     if (BsubN > n) {
3394       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3395       const PetscInt *idx;
3396       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3397       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3398 
3399       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3400       j = 0;
3401       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3402       for (i=0; i<n; i++) {
3403         if (j >= BsubN) break;
3404         while (subgarray[i] > garray[j]) j++;
3405 
3406         if (subgarray[i] == garray[j]) {
3407           idx_new[i] = idx[j++];
3408         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3409       }
3410       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3411 
3412       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3413       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3414 
3415     } else if (BsubN < n) {
3416       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3417     }
3418 
3419     ierr = PetscFree(garray);CHKERRQ(ierr);
3420     *submat = M;
3421 
3422     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3423     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3424     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3425 
3426     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3427     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3428 
3429     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3430     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3431   }
3432   PetscFunctionReturn(0);
3433 }
3434 
3435 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3436 {
3437   PetscErrorCode ierr;
3438   IS             iscol_local=NULL,isrow_d;
3439   PetscInt       csize;
3440   PetscInt       n,i,j,start,end;
3441   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3442   MPI_Comm       comm;
3443 
3444   PetscFunctionBegin;
3445   /* If isrow has same processor distribution as mat,
3446      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3447   if (call == MAT_REUSE_MATRIX) {
3448     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3449     if (isrow_d) {
3450       sameRowDist  = PETSC_TRUE;
3451       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3452     } else {
3453       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3454       if (iscol_local) {
3455         sameRowDist  = PETSC_TRUE;
3456         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3457       }
3458     }
3459   } else {
3460     /* Check if isrow has same processor distribution as mat */
3461     sameDist[0] = PETSC_FALSE;
3462     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3463     if (!n) {
3464       sameDist[0] = PETSC_TRUE;
3465     } else {
3466       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3467       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3468       if (i >= start && j < end) {
3469         sameDist[0] = PETSC_TRUE;
3470       }
3471     }
3472 
3473     /* Check if iscol has same processor distribution as mat */
3474     sameDist[1] = PETSC_FALSE;
3475     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3476     if (!n) {
3477       sameDist[1] = PETSC_TRUE;
3478     } else {
3479       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3480       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3481       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3482     }
3483 
3484     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3485     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3486     sameRowDist = tsameDist[0];
3487   }
3488 
3489   if (sameRowDist) {
3490     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3491       /* isrow and iscol have same processor distribution as mat */
3492       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3493       PetscFunctionReturn(0);
3494     } else { /* sameRowDist */
3495       /* isrow has same processor distribution as mat */
3496       if (call == MAT_INITIAL_MATRIX) {
3497         PetscBool sorted;
3498         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3499         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3500         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3501         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3502 
3503         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3504         if (sorted) {
3505           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3506           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3507           PetscFunctionReturn(0);
3508         }
3509       } else { /* call == MAT_REUSE_MATRIX */
3510         IS    iscol_sub;
3511         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3512         if (iscol_sub) {
3513           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3514           PetscFunctionReturn(0);
3515         }
3516       }
3517     }
3518   }
3519 
3520   /* General case: iscol -> iscol_local which has global size of iscol */
3521   if (call == MAT_REUSE_MATRIX) {
3522     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3523     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3524   } else {
3525     if (!iscol_local) {
3526       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3527     }
3528   }
3529 
3530   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3531   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3532 
3533   if (call == MAT_INITIAL_MATRIX) {
3534     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3535     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3536   }
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 /*@C
3541      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3542          and "off-diagonal" part of the matrix in CSR format.
3543 
3544    Collective
3545 
3546    Input Parameters:
3547 +  comm - MPI communicator
3548 .  A - "diagonal" portion of matrix
3549 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3550 -  garray - global index of B columns
3551 
3552    Output Parameter:
3553 .   mat - the matrix, with input A as its local diagonal matrix
3554    Level: advanced
3555 
3556    Notes:
3557        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3558        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3559 
3560 .seealso: MatCreateMPIAIJWithSplitArrays()
3561 @*/
3562 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3563 {
3564   PetscErrorCode ierr;
3565   Mat_MPIAIJ     *maij;
3566   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3567   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3568   PetscScalar    *oa=b->a;
3569   Mat            Bnew;
3570   PetscInt       m,n,N;
3571 
3572   PetscFunctionBegin;
3573   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3574   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3575   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3576   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3577   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3578   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3579 
3580   /* Get global columns of mat */
3581   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3582 
3583   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3584   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3585   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3586   maij = (Mat_MPIAIJ*)(*mat)->data;
3587 
3588   (*mat)->preallocated = PETSC_TRUE;
3589 
3590   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3591   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3592 
3593   /* Set A as diagonal portion of *mat */
3594   maij->A = A;
3595 
3596   nz = oi[m];
3597   for (i=0; i<nz; i++) {
3598     col   = oj[i];
3599     oj[i] = garray[col];
3600   }
3601 
3602    /* Set Bnew as off-diagonal portion of *mat */
3603   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3604   bnew        = (Mat_SeqAIJ*)Bnew->data;
3605   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3606   maij->B     = Bnew;
3607 
3608   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3609 
3610   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3611   b->free_a       = PETSC_FALSE;
3612   b->free_ij      = PETSC_FALSE;
3613   ierr = MatDestroy(&B);CHKERRQ(ierr);
3614 
3615   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3616   bnew->free_a       = PETSC_TRUE;
3617   bnew->free_ij      = PETSC_TRUE;
3618 
3619   /* condense columns of maij->B */
3620   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3621   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3624   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3629 
3630 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3631 {
3632   PetscErrorCode ierr;
3633   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3634   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3635   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3636   Mat            M,Msub,B=a->B;
3637   MatScalar      *aa;
3638   Mat_SeqAIJ     *aij;
3639   PetscInt       *garray = a->garray,*colsub,Ncols;
3640   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3641   IS             iscol_sub,iscmap;
3642   const PetscInt *is_idx,*cmap;
3643   PetscBool      allcolumns=PETSC_FALSE;
3644   MPI_Comm       comm;
3645 
3646   PetscFunctionBegin;
3647   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3648 
3649   if (call == MAT_REUSE_MATRIX) {
3650     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3651     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3652     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3653 
3654     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3655     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3656 
3657     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3658     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3659 
3660     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3661 
3662   } else { /* call == MAT_INITIAL_MATRIX) */
3663     PetscBool flg;
3664 
3665     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3666     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3667 
3668     /* (1) iscol -> nonscalable iscol_local */
3669     /* Check for special case: each processor gets entire matrix columns */
3670     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3671     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3672     if (allcolumns) {
3673       iscol_sub = iscol_local;
3674       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3675       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3676 
3677     } else {
3678       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3679       PetscInt *idx,*cmap1,k;
3680       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3681       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3682       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3683       count = 0;
3684       k     = 0;
3685       for (i=0; i<Ncols; i++) {
3686         j = is_idx[i];
3687         if (j >= cstart && j < cend) {
3688           /* diagonal part of mat */
3689           idx[count]     = j;
3690           cmap1[count++] = i; /* column index in submat */
3691         } else if (Bn) {
3692           /* off-diagonal part of mat */
3693           if (j == garray[k]) {
3694             idx[count]     = j;
3695             cmap1[count++] = i;  /* column index in submat */
3696           } else if (j > garray[k]) {
3697             while (j > garray[k] && k < Bn-1) k++;
3698             if (j == garray[k]) {
3699               idx[count]     = j;
3700               cmap1[count++] = i; /* column index in submat */
3701             }
3702           }
3703         }
3704       }
3705       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3706 
3707       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3708       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3709       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3710 
3711       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3712     }
3713 
3714     /* (3) Create sequential Msub */
3715     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3716   }
3717 
3718   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3719   aij  = (Mat_SeqAIJ*)(Msub)->data;
3720   ii   = aij->i;
3721   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3722 
3723   /*
3724       m - number of local rows
3725       Ncols - number of columns (same on all processors)
3726       rstart - first row in new global matrix generated
3727   */
3728   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3729 
3730   if (call == MAT_INITIAL_MATRIX) {
3731     /* (4) Create parallel newmat */
3732     PetscMPIInt    rank,size;
3733     PetscInt       csize;
3734 
3735     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3736     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3737 
3738     /*
3739         Determine the number of non-zeros in the diagonal and off-diagonal
3740         portions of the matrix in order to do correct preallocation
3741     */
3742 
3743     /* first get start and end of "diagonal" columns */
3744     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3745     if (csize == PETSC_DECIDE) {
3746       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3747       if (mglobal == Ncols) { /* square matrix */
3748         nlocal = m;
3749       } else {
3750         nlocal = Ncols/size + ((Ncols % size) > rank);
3751       }
3752     } else {
3753       nlocal = csize;
3754     }
3755     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3756     rstart = rend - nlocal;
3757     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3758 
3759     /* next, compute all the lengths */
3760     jj    = aij->j;
3761     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3762     olens = dlens + m;
3763     for (i=0; i<m; i++) {
3764       jend = ii[i+1] - ii[i];
3765       olen = 0;
3766       dlen = 0;
3767       for (j=0; j<jend; j++) {
3768         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3769         else dlen++;
3770         jj++;
3771       }
3772       olens[i] = olen;
3773       dlens[i] = dlen;
3774     }
3775 
3776     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3777     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3778 
3779     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3780     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3781     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3782     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3783     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3784     ierr = PetscFree(dlens);CHKERRQ(ierr);
3785 
3786   } else { /* call == MAT_REUSE_MATRIX */
3787     M    = *newmat;
3788     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3789     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3790     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3791     /*
3792          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3793        rather than the slower MatSetValues().
3794     */
3795     M->was_assembled = PETSC_TRUE;
3796     M->assembled     = PETSC_FALSE;
3797   }
3798 
3799   /* (5) Set values of Msub to *newmat */
3800   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3801   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3802 
3803   jj   = aij->j;
3804   aa   = aij->a;
3805   for (i=0; i<m; i++) {
3806     row = rstart + i;
3807     nz  = ii[i+1] - ii[i];
3808     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3809     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3810     jj += nz; aa += nz;
3811   }
3812   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3813 
3814   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3815   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3816 
3817   ierr = PetscFree(colsub);CHKERRQ(ierr);
3818 
3819   /* save Msub, iscol_sub and iscmap used in processor for next request */
3820   if (call ==  MAT_INITIAL_MATRIX) {
3821     *newmat = M;
3822     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3823     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3824 
3825     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3826     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3827 
3828     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3829     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3830 
3831     if (iscol_local) {
3832       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3833       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3834     }
3835   }
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /*
3840     Not great since it makes two copies of the submatrix, first an SeqAIJ
3841   in local and then by concatenating the local matrices the end result.
3842   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3843 
3844   Note: This requires a sequential iscol with all indices.
3845 */
3846 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3847 {
3848   PetscErrorCode ierr;
3849   PetscMPIInt    rank,size;
3850   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3851   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3852   Mat            M,Mreuse;
3853   MatScalar      *aa,*vwork;
3854   MPI_Comm       comm;
3855   Mat_SeqAIJ     *aij;
3856   PetscBool      colflag,allcolumns=PETSC_FALSE;
3857 
3858   PetscFunctionBegin;
3859   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3860   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3861   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3862 
3863   /* Check for special case: each processor gets entire matrix columns */
3864   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3865   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3866   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3867 
3868   if (call ==  MAT_REUSE_MATRIX) {
3869     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3870     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3871     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3872   } else {
3873     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3874   }
3875 
3876   /*
3877       m - number of local rows
3878       n - number of columns (same on all processors)
3879       rstart - first row in new global matrix generated
3880   */
3881   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3882   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3883   if (call == MAT_INITIAL_MATRIX) {
3884     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3885     ii  = aij->i;
3886     jj  = aij->j;
3887 
3888     /*
3889         Determine the number of non-zeros in the diagonal and off-diagonal
3890         portions of the matrix in order to do correct preallocation
3891     */
3892 
3893     /* first get start and end of "diagonal" columns */
3894     if (csize == PETSC_DECIDE) {
3895       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3896       if (mglobal == n) { /* square matrix */
3897         nlocal = m;
3898       } else {
3899         nlocal = n/size + ((n % size) > rank);
3900       }
3901     } else {
3902       nlocal = csize;
3903     }
3904     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3905     rstart = rend - nlocal;
3906     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3907 
3908     /* next, compute all the lengths */
3909     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3910     olens = dlens + m;
3911     for (i=0; i<m; i++) {
3912       jend = ii[i+1] - ii[i];
3913       olen = 0;
3914       dlen = 0;
3915       for (j=0; j<jend; j++) {
3916         if (*jj < rstart || *jj >= rend) olen++;
3917         else dlen++;
3918         jj++;
3919       }
3920       olens[i] = olen;
3921       dlens[i] = dlen;
3922     }
3923     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3924     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3925     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3926     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3927     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3928     ierr = PetscFree(dlens);CHKERRQ(ierr);
3929   } else {
3930     PetscInt ml,nl;
3931 
3932     M    = *newmat;
3933     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3934     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3935     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3936     /*
3937          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3938        rather than the slower MatSetValues().
3939     */
3940     M->was_assembled = PETSC_TRUE;
3941     M->assembled     = PETSC_FALSE;
3942   }
3943   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3944   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3945   ii   = aij->i;
3946   jj   = aij->j;
3947   aa   = aij->a;
3948   for (i=0; i<m; i++) {
3949     row   = rstart + i;
3950     nz    = ii[i+1] - ii[i];
3951     cwork = jj;     jj += nz;
3952     vwork = aa;     aa += nz;
3953     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3954   }
3955 
3956   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3957   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3958   *newmat = M;
3959 
3960   /* save submatrix used in processor for next request */
3961   if (call ==  MAT_INITIAL_MATRIX) {
3962     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3963     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3964   }
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3969 {
3970   PetscInt       m,cstart, cend,j,nnz,i,d;
3971   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3972   const PetscInt *JJ;
3973   PetscErrorCode ierr;
3974   PetscBool      nooffprocentries;
3975 
3976   PetscFunctionBegin;
3977   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3978 
3979   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3980   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3981   m      = B->rmap->n;
3982   cstart = B->cmap->rstart;
3983   cend   = B->cmap->rend;
3984   rstart = B->rmap->rstart;
3985 
3986   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3987 
3988 #if defined(PETSC_USE_DEBUG)
3989   for (i=0; i<m; i++) {
3990     nnz = Ii[i+1]- Ii[i];
3991     JJ  = J + Ii[i];
3992     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3993     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3994     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3995   }
3996 #endif
3997 
3998   for (i=0; i<m; i++) {
3999     nnz     = Ii[i+1]- Ii[i];
4000     JJ      = J + Ii[i];
4001     nnz_max = PetscMax(nnz_max,nnz);
4002     d       = 0;
4003     for (j=0; j<nnz; j++) {
4004       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4005     }
4006     d_nnz[i] = d;
4007     o_nnz[i] = nnz - d;
4008   }
4009   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4010   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4011 
4012   for (i=0; i<m; i++) {
4013     ii   = i + rstart;
4014     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4015   }
4016   nooffprocentries    = B->nooffprocentries;
4017   B->nooffprocentries = PETSC_TRUE;
4018   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4019   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4020   B->nooffprocentries = nooffprocentries;
4021 
4022   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4023   PetscFunctionReturn(0);
4024 }
4025 
4026 /*@
4027    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4028    (the default parallel PETSc format).
4029 
4030    Collective
4031 
4032    Input Parameters:
4033 +  B - the matrix
4034 .  i - the indices into j for the start of each local row (starts with zero)
4035 .  j - the column indices for each local row (starts with zero)
4036 -  v - optional values in the matrix
4037 
4038    Level: developer
4039 
4040    Notes:
4041        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4042      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4043      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4044 
4045        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4046 
4047        The format which is used for the sparse matrix input, is equivalent to a
4048     row-major ordering.. i.e for the following matrix, the input data expected is
4049     as shown
4050 
4051 $        1 0 0
4052 $        2 0 3     P0
4053 $       -------
4054 $        4 5 6     P1
4055 $
4056 $     Process0 [P0]: rows_owned=[0,1]
4057 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4058 $        j =  {0,0,2}  [size = 3]
4059 $        v =  {1,2,3}  [size = 3]
4060 $
4061 $     Process1 [P1]: rows_owned=[2]
4062 $        i =  {0,3}    [size = nrow+1  = 1+1]
4063 $        j =  {0,1,2}  [size = 3]
4064 $        v =  {4,5,6}  [size = 3]
4065 
4066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4067           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4068 @*/
4069 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4070 {
4071   PetscErrorCode ierr;
4072 
4073   PetscFunctionBegin;
4074   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4075   PetscFunctionReturn(0);
4076 }
4077 
4078 /*@C
4079    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4080    (the default parallel PETSc format).  For good matrix assembly performance
4081    the user should preallocate the matrix storage by setting the parameters
4082    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4083    performance can be increased by more than a factor of 50.
4084 
4085    Collective
4086 
4087    Input Parameters:
4088 +  B - the matrix
4089 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4090            (same value is used for all local rows)
4091 .  d_nnz - array containing the number of nonzeros in the various rows of the
4092            DIAGONAL portion of the local submatrix (possibly different for each row)
4093            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4094            The size of this array is equal to the number of local rows, i.e 'm'.
4095            For matrices that will be factored, you must leave room for (and set)
4096            the diagonal entry even if it is zero.
4097 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4098            submatrix (same value is used for all local rows).
4099 -  o_nnz - array containing the number of nonzeros in the various rows of the
4100            OFF-DIAGONAL portion of the local submatrix (possibly different for
4101            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4102            structure. The size of this array is equal to the number
4103            of local rows, i.e 'm'.
4104 
4105    If the *_nnz parameter is given then the *_nz parameter is ignored
4106 
4107    The AIJ format (also called the Yale sparse matrix format or
4108    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4109    storage.  The stored row and column indices begin with zero.
4110    See Users-Manual: ch_mat for details.
4111 
4112    The parallel matrix is partitioned such that the first m0 rows belong to
4113    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4114    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4115 
4116    The DIAGONAL portion of the local submatrix of a processor can be defined
4117    as the submatrix which is obtained by extraction the part corresponding to
4118    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4119    first row that belongs to the processor, r2 is the last row belonging to
4120    the this processor, and c1-c2 is range of indices of the local part of a
4121    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4122    common case of a square matrix, the row and column ranges are the same and
4123    the DIAGONAL part is also square. The remaining portion of the local
4124    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4125 
4126    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4127 
4128    You can call MatGetInfo() to get information on how effective the preallocation was;
4129    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4130    You can also run with the option -info and look for messages with the string
4131    malloc in them to see if additional memory allocation was needed.
4132 
4133    Example usage:
4134 
4135    Consider the following 8x8 matrix with 34 non-zero values, that is
4136    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4137    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4138    as follows:
4139 
4140 .vb
4141             1  2  0  |  0  3  0  |  0  4
4142     Proc0   0  5  6  |  7  0  0  |  8  0
4143             9  0 10  | 11  0  0  | 12  0
4144     -------------------------------------
4145            13  0 14  | 15 16 17  |  0  0
4146     Proc1   0 18  0  | 19 20 21  |  0  0
4147             0  0  0  | 22 23  0  | 24  0
4148     -------------------------------------
4149     Proc2  25 26 27  |  0  0 28  | 29  0
4150            30  0  0  | 31 32 33  |  0 34
4151 .ve
4152 
4153    This can be represented as a collection of submatrices as:
4154 
4155 .vb
4156       A B C
4157       D E F
4158       G H I
4159 .ve
4160 
4161    Where the submatrices A,B,C are owned by proc0, D,E,F are
4162    owned by proc1, G,H,I are owned by proc2.
4163 
4164    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4165    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4166    The 'M','N' parameters are 8,8, and have the same values on all procs.
4167 
4168    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4169    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4170    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4171    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4172    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4173    matrix, ans [DF] as another SeqAIJ matrix.
4174 
4175    When d_nz, o_nz parameters are specified, d_nz storage elements are
4176    allocated for every row of the local diagonal submatrix, and o_nz
4177    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4178    One way to choose d_nz and o_nz is to use the max nonzerors per local
4179    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4180    In this case, the values of d_nz,o_nz are:
4181 .vb
4182      proc0 : dnz = 2, o_nz = 2
4183      proc1 : dnz = 3, o_nz = 2
4184      proc2 : dnz = 1, o_nz = 4
4185 .ve
4186    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4187    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4188    for proc3. i.e we are using 12+15+10=37 storage locations to store
4189    34 values.
4190 
4191    When d_nnz, o_nnz parameters are specified, the storage is specified
4192    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4193    In the above case the values for d_nnz,o_nnz are:
4194 .vb
4195      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4196      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4197      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4198 .ve
4199    Here the space allocated is sum of all the above values i.e 34, and
4200    hence pre-allocation is perfect.
4201 
4202    Level: intermediate
4203 
4204 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4205           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4206 @*/
4207 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4208 {
4209   PetscErrorCode ierr;
4210 
4211   PetscFunctionBegin;
4212   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4213   PetscValidType(B,1);
4214   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4215   PetscFunctionReturn(0);
4216 }
4217 
4218 /*@
4219      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4220          CSR format for the local rows.
4221 
4222    Collective
4223 
4224    Input Parameters:
4225 +  comm - MPI communicator
4226 .  m - number of local rows (Cannot be PETSC_DECIDE)
4227 .  n - This value should be the same as the local size used in creating the
4228        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4229        calculated if N is given) For square matrices n is almost always m.
4230 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4231 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4232 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4233 .   j - column indices
4234 -   a - matrix values
4235 
4236    Output Parameter:
4237 .   mat - the matrix
4238 
4239    Level: intermediate
4240 
4241    Notes:
4242        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4243      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4244      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4245 
4246        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4247 
4248        The format which is used for the sparse matrix input, is equivalent to a
4249     row-major ordering.. i.e for the following matrix, the input data expected is
4250     as shown
4251 
4252        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4253 
4254 $        1 0 0
4255 $        2 0 3     P0
4256 $       -------
4257 $        4 5 6     P1
4258 $
4259 $     Process0 [P0]: rows_owned=[0,1]
4260 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4261 $        j =  {0,0,2}  [size = 3]
4262 $        v =  {1,2,3}  [size = 3]
4263 $
4264 $     Process1 [P1]: rows_owned=[2]
4265 $        i =  {0,3}    [size = nrow+1  = 1+1]
4266 $        j =  {0,1,2}  [size = 3]
4267 $        v =  {4,5,6}  [size = 3]
4268 
4269 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4270           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4271 @*/
4272 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4273 {
4274   PetscErrorCode ierr;
4275 
4276   PetscFunctionBegin;
4277   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4278   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4279   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4280   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4281   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4282   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4283   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4284   PetscFunctionReturn(0);
4285 }
4286 
4287 /*@
4288      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4289          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4290 
4291    Collective
4292 
4293    Input Parameters:
4294 +  mat - the matrix
4295 .  m - number of local rows (Cannot be PETSC_DECIDE)
4296 .  n - This value should be the same as the local size used in creating the
4297        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4298        calculated if N is given) For square matrices n is almost always m.
4299 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4300 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4301 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4302 .  J - column indices
4303 -  v - matrix values
4304 
4305    Level: intermediate
4306 
4307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4308           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4309 @*/
4310 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4311 {
4312   PetscErrorCode ierr;
4313   PetscInt       cstart,nnz,i,j;
4314   PetscInt       *ld;
4315   PetscBool      nooffprocentries;
4316   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4317   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4318   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4319   const PetscInt *Adi = Ad->i;
4320   PetscInt       ldi,Iii,md;
4321 
4322   PetscFunctionBegin;
4323   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4324   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4325   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4326   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4327 
4328   cstart = mat->cmap->rstart;
4329   if (!Aij->ld) {
4330     /* count number of entries below block diagonal */
4331     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4332     Aij->ld = ld;
4333     for (i=0; i<m; i++) {
4334       nnz  = Ii[i+1]- Ii[i];
4335       j     = 0;
4336       while  (J[j] < cstart && j < nnz) {j++;}
4337       J    += nnz;
4338       ld[i] = j;
4339     }
4340   } else {
4341     ld = Aij->ld;
4342   }
4343 
4344   for (i=0; i<m; i++) {
4345     nnz  = Ii[i+1]- Ii[i];
4346     Iii  = Ii[i];
4347     ldi  = ld[i];
4348     md   = Adi[i+1]-Adi[i];
4349     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4350     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4351     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4352     ad  += md;
4353     ao  += nnz - md;
4354   }
4355   nooffprocentries      = mat->nooffprocentries;
4356   mat->nooffprocentries = PETSC_TRUE;
4357   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4358   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4359   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4360   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4361   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4362   mat->nooffprocentries = nooffprocentries;
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 /*@C
4367    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4368    (the default parallel PETSc format).  For good matrix assembly performance
4369    the user should preallocate the matrix storage by setting the parameters
4370    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4371    performance can be increased by more than a factor of 50.
4372 
4373    Collective
4374 
4375    Input Parameters:
4376 +  comm - MPI communicator
4377 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4378            This value should be the same as the local size used in creating the
4379            y vector for the matrix-vector product y = Ax.
4380 .  n - This value should be the same as the local size used in creating the
4381        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4382        calculated if N is given) For square matrices n is almost always m.
4383 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4384 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4385 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4386            (same value is used for all local rows)
4387 .  d_nnz - array containing the number of nonzeros in the various rows of the
4388            DIAGONAL portion of the local submatrix (possibly different for each row)
4389            or NULL, if d_nz is used to specify the nonzero structure.
4390            The size of this array is equal to the number of local rows, i.e 'm'.
4391 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4392            submatrix (same value is used for all local rows).
4393 -  o_nnz - array containing the number of nonzeros in the various rows of the
4394            OFF-DIAGONAL portion of the local submatrix (possibly different for
4395            each row) or NULL, if o_nz is used to specify the nonzero
4396            structure. The size of this array is equal to the number
4397            of local rows, i.e 'm'.
4398 
4399    Output Parameter:
4400 .  A - the matrix
4401 
4402    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4403    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4404    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4405 
4406    Notes:
4407    If the *_nnz parameter is given then the *_nz parameter is ignored
4408 
4409    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4410    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4411    storage requirements for this matrix.
4412 
4413    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4414    processor than it must be used on all processors that share the object for
4415    that argument.
4416 
4417    The user MUST specify either the local or global matrix dimensions
4418    (possibly both).
4419 
4420    The parallel matrix is partitioned across processors such that the
4421    first m0 rows belong to process 0, the next m1 rows belong to
4422    process 1, the next m2 rows belong to process 2 etc.. where
4423    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4424    values corresponding to [m x N] submatrix.
4425 
4426    The columns are logically partitioned with the n0 columns belonging
4427    to 0th partition, the next n1 columns belonging to the next
4428    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4429 
4430    The DIAGONAL portion of the local submatrix on any given processor
4431    is the submatrix corresponding to the rows and columns m,n
4432    corresponding to the given processor. i.e diagonal matrix on
4433    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4434    etc. The remaining portion of the local submatrix [m x (N-n)]
4435    constitute the OFF-DIAGONAL portion. The example below better
4436    illustrates this concept.
4437 
4438    For a square global matrix we define each processor's diagonal portion
4439    to be its local rows and the corresponding columns (a square submatrix);
4440    each processor's off-diagonal portion encompasses the remainder of the
4441    local matrix (a rectangular submatrix).
4442 
4443    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4444 
4445    When calling this routine with a single process communicator, a matrix of
4446    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4447    type of communicator, use the construction mechanism
4448 .vb
4449      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4450 .ve
4451 
4452 $     MatCreate(...,&A);
4453 $     MatSetType(A,MATMPIAIJ);
4454 $     MatSetSizes(A, m,n,M,N);
4455 $     MatMPIAIJSetPreallocation(A,...);
4456 
4457    By default, this format uses inodes (identical nodes) when possible.
4458    We search for consecutive rows with the same nonzero structure, thereby
4459    reusing matrix information to achieve increased efficiency.
4460 
4461    Options Database Keys:
4462 +  -mat_no_inode  - Do not use inodes
4463 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4464 
4465 
4466 
4467    Example usage:
4468 
4469    Consider the following 8x8 matrix with 34 non-zero values, that is
4470    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4471    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4472    as follows
4473 
4474 .vb
4475             1  2  0  |  0  3  0  |  0  4
4476     Proc0   0  5  6  |  7  0  0  |  8  0
4477             9  0 10  | 11  0  0  | 12  0
4478     -------------------------------------
4479            13  0 14  | 15 16 17  |  0  0
4480     Proc1   0 18  0  | 19 20 21  |  0  0
4481             0  0  0  | 22 23  0  | 24  0
4482     -------------------------------------
4483     Proc2  25 26 27  |  0  0 28  | 29  0
4484            30  0  0  | 31 32 33  |  0 34
4485 .ve
4486 
4487    This can be represented as a collection of submatrices as
4488 
4489 .vb
4490       A B C
4491       D E F
4492       G H I
4493 .ve
4494 
4495    Where the submatrices A,B,C are owned by proc0, D,E,F are
4496    owned by proc1, G,H,I are owned by proc2.
4497 
4498    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4499    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4500    The 'M','N' parameters are 8,8, and have the same values on all procs.
4501 
4502    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4503    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4504    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4505    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4506    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4507    matrix, ans [DF] as another SeqAIJ matrix.
4508 
4509    When d_nz, o_nz parameters are specified, d_nz storage elements are
4510    allocated for every row of the local diagonal submatrix, and o_nz
4511    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4512    One way to choose d_nz and o_nz is to use the max nonzerors per local
4513    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4514    In this case, the values of d_nz,o_nz are
4515 .vb
4516      proc0 : dnz = 2, o_nz = 2
4517      proc1 : dnz = 3, o_nz = 2
4518      proc2 : dnz = 1, o_nz = 4
4519 .ve
4520    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4521    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4522    for proc3. i.e we are using 12+15+10=37 storage locations to store
4523    34 values.
4524 
4525    When d_nnz, o_nnz parameters are specified, the storage is specified
4526    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4527    In the above case the values for d_nnz,o_nnz are
4528 .vb
4529      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4530      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4531      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4532 .ve
4533    Here the space allocated is sum of all the above values i.e 34, and
4534    hence pre-allocation is perfect.
4535 
4536    Level: intermediate
4537 
4538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4539           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4540 @*/
4541 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4542 {
4543   PetscErrorCode ierr;
4544   PetscMPIInt    size;
4545 
4546   PetscFunctionBegin;
4547   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4548   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4550   if (size > 1) {
4551     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4552     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4553   } else {
4554     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4555     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4556   }
4557   PetscFunctionReturn(0);
4558 }
4559 
4560 /*@C
4561   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4562 
4563   Not collective
4564 
4565   Input Parameter:
4566 . A - The MPIAIJ matrix
4567 
4568   Output Parameters:
4569 + Ad - The local diagonal block as a SeqAIJ matrix
4570 . Ao - The local off-diagonal block as a SeqAIJ matrix
4571 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4572 
4573   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4574   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4575   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4576   local column numbers to global column numbers in the original matrix.
4577 
4578   Level: intermediate
4579 
4580 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ
4581 @*/
4582 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4583 {
4584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4585   PetscBool      flg;
4586   PetscErrorCode ierr;
4587 
4588   PetscFunctionBegin;
4589   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4590   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4591   if (Ad)     *Ad     = a->A;
4592   if (Ao)     *Ao     = a->B;
4593   if (colmap) *colmap = a->garray;
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4598 {
4599   PetscErrorCode ierr;
4600   PetscInt       m,N,i,rstart,nnz,Ii;
4601   PetscInt       *indx;
4602   PetscScalar    *values;
4603 
4604   PetscFunctionBegin;
4605   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4606   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4607     PetscInt       *dnz,*onz,sum,bs,cbs;
4608 
4609     if (n == PETSC_DECIDE) {
4610       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4611     }
4612     /* Check sum(n) = N */
4613     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4614     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4615 
4616     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4617     rstart -= m;
4618 
4619     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4620     for (i=0; i<m; i++) {
4621       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4622       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4623       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4624     }
4625 
4626     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4627     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4628     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4629     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4630     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4631     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4632     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4633     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4634   }
4635 
4636   /* numeric phase */
4637   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4638   for (i=0; i<m; i++) {
4639     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4640     Ii   = i + rstart;
4641     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4642     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4643   }
4644   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4645   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4646   PetscFunctionReturn(0);
4647 }
4648 
4649 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4650 {
4651   PetscErrorCode    ierr;
4652   PetscMPIInt       rank;
4653   PetscInt          m,N,i,rstart,nnz;
4654   size_t            len;
4655   const PetscInt    *indx;
4656   PetscViewer       out;
4657   char              *name;
4658   Mat               B;
4659   const PetscScalar *values;
4660 
4661   PetscFunctionBegin;
4662   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4663   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4664   /* Should this be the type of the diagonal block of A? */
4665   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4666   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4667   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4668   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4669   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4670   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4671   for (i=0; i<m; i++) {
4672     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4673     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4674     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4675   }
4676   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4677   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4678 
4679   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4680   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4681   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4682   sprintf(name,"%s.%d",outfile,rank);
4683   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4684   ierr = PetscFree(name);CHKERRQ(ierr);
4685   ierr = MatView(B,out);CHKERRQ(ierr);
4686   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4687   ierr = MatDestroy(&B);CHKERRQ(ierr);
4688   PetscFunctionReturn(0);
4689 }
4690 
4691 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4692 {
4693   PetscErrorCode      ierr;
4694   Mat_Merge_SeqsToMPI *merge;
4695   PetscContainer      container;
4696 
4697   PetscFunctionBegin;
4698   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4699   if (container) {
4700     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4701     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4702     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4703     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4704     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4705     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4706     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4707     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4708     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4709     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4710     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4711     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4712     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4713     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4714     ierr = PetscFree(merge);CHKERRQ(ierr);
4715     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4716   }
4717   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4718   PetscFunctionReturn(0);
4719 }
4720 
4721 #include <../src/mat/utils/freespace.h>
4722 #include <petscbt.h>
4723 
4724 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4725 {
4726   PetscErrorCode      ierr;
4727   MPI_Comm            comm;
4728   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4729   PetscMPIInt         size,rank,taga,*len_s;
4730   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4731   PetscInt            proc,m;
4732   PetscInt            **buf_ri,**buf_rj;
4733   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4734   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4735   MPI_Request         *s_waits,*r_waits;
4736   MPI_Status          *status;
4737   MatScalar           *aa=a->a;
4738   MatScalar           **abuf_r,*ba_i;
4739   Mat_Merge_SeqsToMPI *merge;
4740   PetscContainer      container;
4741 
4742   PetscFunctionBegin;
4743   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4744   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4745 
4746   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4747   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4748 
4749   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4750   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4751 
4752   bi     = merge->bi;
4753   bj     = merge->bj;
4754   buf_ri = merge->buf_ri;
4755   buf_rj = merge->buf_rj;
4756 
4757   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4758   owners = merge->rowmap->range;
4759   len_s  = merge->len_s;
4760 
4761   /* send and recv matrix values */
4762   /*-----------------------------*/
4763   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4764   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4765 
4766   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4767   for (proc=0,k=0; proc<size; proc++) {
4768     if (!len_s[proc]) continue;
4769     i    = owners[proc];
4770     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4771     k++;
4772   }
4773 
4774   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4775   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4776   ierr = PetscFree(status);CHKERRQ(ierr);
4777 
4778   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4779   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4780 
4781   /* insert mat values of mpimat */
4782   /*----------------------------*/
4783   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4784   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4785 
4786   for (k=0; k<merge->nrecv; k++) {
4787     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4788     nrows       = *(buf_ri_k[k]);
4789     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4790     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4791   }
4792 
4793   /* set values of ba */
4794   m = merge->rowmap->n;
4795   for (i=0; i<m; i++) {
4796     arow = owners[rank] + i;
4797     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4798     bnzi = bi[i+1] - bi[i];
4799     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4800 
4801     /* add local non-zero vals of this proc's seqmat into ba */
4802     anzi   = ai[arow+1] - ai[arow];
4803     aj     = a->j + ai[arow];
4804     aa     = a->a + ai[arow];
4805     nextaj = 0;
4806     for (j=0; nextaj<anzi; j++) {
4807       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4808         ba_i[j] += aa[nextaj++];
4809       }
4810     }
4811 
4812     /* add received vals into ba */
4813     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4814       /* i-th row */
4815       if (i == *nextrow[k]) {
4816         anzi   = *(nextai[k]+1) - *nextai[k];
4817         aj     = buf_rj[k] + *(nextai[k]);
4818         aa     = abuf_r[k] + *(nextai[k]);
4819         nextaj = 0;
4820         for (j=0; nextaj<anzi; j++) {
4821           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4822             ba_i[j] += aa[nextaj++];
4823           }
4824         }
4825         nextrow[k]++; nextai[k]++;
4826       }
4827     }
4828     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4829   }
4830   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4831   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4832 
4833   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4834   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4835   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4836   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4837   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4838   PetscFunctionReturn(0);
4839 }
4840 
4841 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4842 {
4843   PetscErrorCode      ierr;
4844   Mat                 B_mpi;
4845   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4846   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4847   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4848   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4849   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4850   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4851   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4852   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4853   MPI_Status          *status;
4854   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4855   PetscBT             lnkbt;
4856   Mat_Merge_SeqsToMPI *merge;
4857   PetscContainer      container;
4858 
4859   PetscFunctionBegin;
4860   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4861 
4862   /* make sure it is a PETSc comm */
4863   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4864   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4865   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4866 
4867   ierr = PetscNew(&merge);CHKERRQ(ierr);
4868   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4869 
4870   /* determine row ownership */
4871   /*---------------------------------------------------------*/
4872   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4873   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4874   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4875   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4876   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4877   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4878   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4879 
4880   m      = merge->rowmap->n;
4881   owners = merge->rowmap->range;
4882 
4883   /* determine the number of messages to send, their lengths */
4884   /*---------------------------------------------------------*/
4885   len_s = merge->len_s;
4886 
4887   len          = 0; /* length of buf_si[] */
4888   merge->nsend = 0;
4889   for (proc=0; proc<size; proc++) {
4890     len_si[proc] = 0;
4891     if (proc == rank) {
4892       len_s[proc] = 0;
4893     } else {
4894       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4895       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4896     }
4897     if (len_s[proc]) {
4898       merge->nsend++;
4899       nrows = 0;
4900       for (i=owners[proc]; i<owners[proc+1]; i++) {
4901         if (ai[i+1] > ai[i]) nrows++;
4902       }
4903       len_si[proc] = 2*(nrows+1);
4904       len         += len_si[proc];
4905     }
4906   }
4907 
4908   /* determine the number and length of messages to receive for ij-structure */
4909   /*-------------------------------------------------------------------------*/
4910   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4911   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4912 
4913   /* post the Irecv of j-structure */
4914   /*-------------------------------*/
4915   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4916   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4917 
4918   /* post the Isend of j-structure */
4919   /*--------------------------------*/
4920   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4921 
4922   for (proc=0, k=0; proc<size; proc++) {
4923     if (!len_s[proc]) continue;
4924     i    = owners[proc];
4925     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4926     k++;
4927   }
4928 
4929   /* receives and sends of j-structure are complete */
4930   /*------------------------------------------------*/
4931   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4932   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4933 
4934   /* send and recv i-structure */
4935   /*---------------------------*/
4936   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4937   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4938 
4939   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4940   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4941   for (proc=0,k=0; proc<size; proc++) {
4942     if (!len_s[proc]) continue;
4943     /* form outgoing message for i-structure:
4944          buf_si[0]:                 nrows to be sent
4945                [1:nrows]:           row index (global)
4946                [nrows+1:2*nrows+1]: i-structure index
4947     */
4948     /*-------------------------------------------*/
4949     nrows       = len_si[proc]/2 - 1;
4950     buf_si_i    = buf_si + nrows+1;
4951     buf_si[0]   = nrows;
4952     buf_si_i[0] = 0;
4953     nrows       = 0;
4954     for (i=owners[proc]; i<owners[proc+1]; i++) {
4955       anzi = ai[i+1] - ai[i];
4956       if (anzi) {
4957         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4958         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4959         nrows++;
4960       }
4961     }
4962     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4963     k++;
4964     buf_si += len_si[proc];
4965   }
4966 
4967   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4968   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4969 
4970   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4971   for (i=0; i<merge->nrecv; i++) {
4972     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4973   }
4974 
4975   ierr = PetscFree(len_si);CHKERRQ(ierr);
4976   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4977   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4978   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4979   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4980   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4981   ierr = PetscFree(status);CHKERRQ(ierr);
4982 
4983   /* compute a local seq matrix in each processor */
4984   /*----------------------------------------------*/
4985   /* allocate bi array and free space for accumulating nonzero column info */
4986   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4987   bi[0] = 0;
4988 
4989   /* create and initialize a linked list */
4990   nlnk = N+1;
4991   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4992 
4993   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4994   len  = ai[owners[rank+1]] - ai[owners[rank]];
4995   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4996 
4997   current_space = free_space;
4998 
4999   /* determine symbolic info for each local row */
5000   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5001 
5002   for (k=0; k<merge->nrecv; k++) {
5003     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5004     nrows       = *buf_ri_k[k];
5005     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5006     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5007   }
5008 
5009   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5010   len  = 0;
5011   for (i=0; i<m; i++) {
5012     bnzi = 0;
5013     /* add local non-zero cols of this proc's seqmat into lnk */
5014     arow  = owners[rank] + i;
5015     anzi  = ai[arow+1] - ai[arow];
5016     aj    = a->j + ai[arow];
5017     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5018     bnzi += nlnk;
5019     /* add received col data into lnk */
5020     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5021       if (i == *nextrow[k]) { /* i-th row */
5022         anzi  = *(nextai[k]+1) - *nextai[k];
5023         aj    = buf_rj[k] + *nextai[k];
5024         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5025         bnzi += nlnk;
5026         nextrow[k]++; nextai[k]++;
5027       }
5028     }
5029     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5030 
5031     /* if free space is not available, make more free space */
5032     if (current_space->local_remaining<bnzi) {
5033       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5034       nspacedouble++;
5035     }
5036     /* copy data into free space, then initialize lnk */
5037     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5038     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5039 
5040     current_space->array           += bnzi;
5041     current_space->local_used      += bnzi;
5042     current_space->local_remaining -= bnzi;
5043 
5044     bi[i+1] = bi[i] + bnzi;
5045   }
5046 
5047   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5048 
5049   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5050   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5051   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5052 
5053   /* create symbolic parallel matrix B_mpi */
5054   /*---------------------------------------*/
5055   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5056   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5057   if (n==PETSC_DECIDE) {
5058     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5059   } else {
5060     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5061   }
5062   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5063   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5064   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5065   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5066   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5067 
5068   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5069   B_mpi->assembled    = PETSC_FALSE;
5070   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5071   merge->bi           = bi;
5072   merge->bj           = bj;
5073   merge->buf_ri       = buf_ri;
5074   merge->buf_rj       = buf_rj;
5075   merge->coi          = NULL;
5076   merge->coj          = NULL;
5077   merge->owners_co    = NULL;
5078 
5079   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5080 
5081   /* attach the supporting struct to B_mpi for reuse */
5082   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5083   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5084   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5085   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5086   *mpimat = B_mpi;
5087 
5088   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5089   PetscFunctionReturn(0);
5090 }
5091 
5092 /*@C
5093       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5094                  matrices from each processor
5095 
5096     Collective
5097 
5098    Input Parameters:
5099 +    comm - the communicators the parallel matrix will live on
5100 .    seqmat - the input sequential matrices
5101 .    m - number of local rows (or PETSC_DECIDE)
5102 .    n - number of local columns (or PETSC_DECIDE)
5103 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 
5105    Output Parameter:
5106 .    mpimat - the parallel matrix generated
5107 
5108     Level: advanced
5109 
5110    Notes:
5111      The dimensions of the sequential matrix in each processor MUST be the same.
5112      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5113      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5114 @*/
5115 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5116 {
5117   PetscErrorCode ierr;
5118   PetscMPIInt    size;
5119 
5120   PetscFunctionBegin;
5121   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5122   if (size == 1) {
5123     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5124     if (scall == MAT_INITIAL_MATRIX) {
5125       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5126     } else {
5127       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5128     }
5129     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5130     PetscFunctionReturn(0);
5131   }
5132   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5133   if (scall == MAT_INITIAL_MATRIX) {
5134     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5135   }
5136   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5137   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5138   PetscFunctionReturn(0);
5139 }
5140 
5141 /*@
5142      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5143           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5144           with MatGetSize()
5145 
5146     Not Collective
5147 
5148    Input Parameters:
5149 +    A - the matrix
5150 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5151 
5152    Output Parameter:
5153 .    A_loc - the local sequential matrix generated
5154 
5155     Level: developer
5156 
5157    Notes:
5158      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5159      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5160      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5161      modify the values of the returned A_loc.
5162 
5163 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5164 
5165 @*/
5166 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5167 {
5168   PetscErrorCode ierr;
5169   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5170   Mat_SeqAIJ     *mat,*a,*b;
5171   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5172   MatScalar      *aa,*ba,*cam;
5173   PetscScalar    *ca;
5174   PetscMPIInt    size;
5175   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5176   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5177   PetscBool      match;
5178 
5179   PetscFunctionBegin;
5180   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5181   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5183   if (size == 1) {
5184     if (scall == MAT_INITIAL_MATRIX) {
5185       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5186       *A_loc = mpimat->A;
5187     } else if (scall == MAT_REUSE_MATRIX) {
5188       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5189     }
5190     PetscFunctionReturn(0);
5191   }
5192 
5193   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5194   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5195   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5196   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5197   aa = a->a; ba = b->a;
5198   if (scall == MAT_INITIAL_MATRIX) {
5199     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5200     ci[0] = 0;
5201     for (i=0; i<am; i++) {
5202       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5203     }
5204     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5205     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5206     k    = 0;
5207     for (i=0; i<am; i++) {
5208       ncols_o = bi[i+1] - bi[i];
5209       ncols_d = ai[i+1] - ai[i];
5210       /* off-diagonal portion of A */
5211       for (jo=0; jo<ncols_o; jo++) {
5212         col = cmap[*bj];
5213         if (col >= cstart) break;
5214         cj[k]   = col; bj++;
5215         ca[k++] = *ba++;
5216       }
5217       /* diagonal portion of A */
5218       for (j=0; j<ncols_d; j++) {
5219         cj[k]   = cstart + *aj++;
5220         ca[k++] = *aa++;
5221       }
5222       /* off-diagonal portion of A */
5223       for (j=jo; j<ncols_o; j++) {
5224         cj[k]   = cmap[*bj++];
5225         ca[k++] = *ba++;
5226       }
5227     }
5228     /* put together the new matrix */
5229     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5230     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5231     /* Since these are PETSc arrays, change flags to free them as necessary. */
5232     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5233     mat->free_a  = PETSC_TRUE;
5234     mat->free_ij = PETSC_TRUE;
5235     mat->nonew   = 0;
5236   } else if (scall == MAT_REUSE_MATRIX) {
5237     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5238     ci = mat->i; cj = mat->j; cam = mat->a;
5239     for (i=0; i<am; i++) {
5240       /* off-diagonal portion of A */
5241       ncols_o = bi[i+1] - bi[i];
5242       for (jo=0; jo<ncols_o; jo++) {
5243         col = cmap[*bj];
5244         if (col >= cstart) break;
5245         *cam++ = *ba++; bj++;
5246       }
5247       /* diagonal portion of A */
5248       ncols_d = ai[i+1] - ai[i];
5249       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5250       /* off-diagonal portion of A */
5251       for (j=jo; j<ncols_o; j++) {
5252         *cam++ = *ba++; bj++;
5253       }
5254     }
5255   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5256   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5257   PetscFunctionReturn(0);
5258 }
5259 
5260 /*@C
5261      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5262 
5263     Not Collective
5264 
5265    Input Parameters:
5266 +    A - the matrix
5267 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5268 -    row, col - index sets of rows and columns to extract (or NULL)
5269 
5270    Output Parameter:
5271 .    A_loc - the local sequential matrix generated
5272 
5273     Level: developer
5274 
5275 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5276 
5277 @*/
5278 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5279 {
5280   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5281   PetscErrorCode ierr;
5282   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5283   IS             isrowa,iscola;
5284   Mat            *aloc;
5285   PetscBool      match;
5286 
5287   PetscFunctionBegin;
5288   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5289   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5290   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5291   if (!row) {
5292     start = A->rmap->rstart; end = A->rmap->rend;
5293     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5294   } else {
5295     isrowa = *row;
5296   }
5297   if (!col) {
5298     start = A->cmap->rstart;
5299     cmap  = a->garray;
5300     nzA   = a->A->cmap->n;
5301     nzB   = a->B->cmap->n;
5302     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5303     ncols = 0;
5304     for (i=0; i<nzB; i++) {
5305       if (cmap[i] < start) idx[ncols++] = cmap[i];
5306       else break;
5307     }
5308     imark = i;
5309     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5310     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5311     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5312   } else {
5313     iscola = *col;
5314   }
5315   if (scall != MAT_INITIAL_MATRIX) {
5316     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5317     aloc[0] = *A_loc;
5318   }
5319   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5320   if (!col) { /* attach global id of condensed columns */
5321     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5322   }
5323   *A_loc = aloc[0];
5324   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5325   if (!row) {
5326     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5327   }
5328   if (!col) {
5329     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5330   }
5331   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5332   PetscFunctionReturn(0);
5333 }
5334 
5335 /*
5336  * Destroy a mat that may be composed with PetscSF communication objects.
5337  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5338  * */
5339 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5340 {
5341   PetscSF          sf,osf;
5342   IS               map;
5343   PetscErrorCode   ierr;
5344 
5345   PetscFunctionBegin;
5346   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5347   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5348   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5349   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5350   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5351   ierr = ISDestroy(&map);CHKERRQ(ierr);
5352   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5353   PetscFunctionReturn(0);
5354 }
5355 
5356 /*
5357  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5358  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5359  * on a global size.
5360  * */
5361 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5362 {
5363   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5364   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5365   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5366   PetscMPIInt              owner;
5367   PetscSFNode              *iremote,*oiremote;
5368   const PetscInt           *lrowindices;
5369   PetscErrorCode           ierr;
5370   PetscSF                  sf,osf;
5371   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5372   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5373   MPI_Comm                 comm;
5374   ISLocalToGlobalMapping   mapping;
5375 
5376   PetscFunctionBegin;
5377   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5378   /* plocalsize is the number of roots
5379    * nrows is the number of leaves
5380    * */
5381   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5382   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5383   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5384   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5385   for (i=0;i<nrows;i++) {
5386     /* Find a remote index and an owner for a row
5387      * The row could be local or remote
5388      * */
5389     owner = 0;
5390     lidx  = 0;
5391     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5392     iremote[i].index = lidx;
5393     iremote[i].rank  = owner;
5394   }
5395   /* Create SF to communicate how many nonzero columns for each row */
5396   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5397   /* SF will figure out the number of nonzero colunms for each row, and their
5398    * offsets
5399    * */
5400   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5401   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5402   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5403 
5404   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5405   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5406   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5407   roffsets[0] = 0;
5408   roffsets[1] = 0;
5409   for (i=0;i<plocalsize;i++) {
5410     /* diag */
5411     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5412     /* off diag */
5413     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5414     /* compute offsets so that we relative location for each row */
5415     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5416     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5417   }
5418   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5419   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5420   /* 'r' means root, and 'l' means leaf */
5421   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5422   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5423   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5424   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5425   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5426   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5427   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5428   dntotalcols = 0;
5429   ontotalcols = 0;
5430   ncol = 0;
5431   for (i=0;i<nrows;i++) {
5432     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5433     ncol = PetscMax(pnnz[i],ncol);
5434     /* diag */
5435     dntotalcols += nlcols[i*2+0];
5436     /* off diag */
5437     ontotalcols += nlcols[i*2+1];
5438   }
5439   /* We do not need to figure the right number of columns
5440    * since all the calculations will be done by going through the raw data
5441    * */
5442   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5443   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5444   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5445   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5446   /* diag */
5447   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5448   /* off diag */
5449   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5450   /* diag */
5451   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5452   /* off diag */
5453   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5454   dntotalcols = 0;
5455   ontotalcols = 0;
5456   ntotalcols  = 0;
5457   for (i=0;i<nrows;i++) {
5458     owner = 0;
5459     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5460     /* Set iremote for diag matrix */
5461     for (j=0;j<nlcols[i*2+0];j++) {
5462       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5463       iremote[dntotalcols].rank    = owner;
5464       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5465       ilocal[dntotalcols++]        = ntotalcols++;
5466     }
5467     /* off diag */
5468     for (j=0;j<nlcols[i*2+1];j++) {
5469       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5470       oiremote[ontotalcols].rank    = owner;
5471       oilocal[ontotalcols++]        = ntotalcols++;
5472     }
5473   }
5474   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5475   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5476   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5477   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5478   /* P serves as roots and P_oth is leaves
5479    * Diag matrix
5480    * */
5481   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5482   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5483   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5484 
5485   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5486   /* Off diag */
5487   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5488   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5489   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5490   /* We operate on the matrix internal data for saving memory */
5491   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5492   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5493   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5494   /* Convert to global indices for diag matrix */
5495   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5496   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5497   /* We want P_oth store global indices */
5498   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5499   /* Use memory scalable approach */
5500   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5501   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5502   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5503   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5504   /* Convert back to local indices */
5505   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5506   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5507   nout = 0;
5508   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5509   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5510   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5511   /* Exchange values */
5512   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5513   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5514   /* Stop PETSc from shrinking memory */
5515   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5516   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5517   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5518   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5519   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5520   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5521   /* ``New MatDestroy" takes care of PetscSF objects as well */
5522   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5523   PetscFunctionReturn(0);
5524 }
5525 
5526 /*
5527  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5528  * This supports MPIAIJ and MAIJ
5529  * */
5530 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5531 {
5532   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5533   Mat_SeqAIJ            *p_oth;
5534   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5535   IS                    rows,map;
5536   PetscHMapI            hamp;
5537   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5538   MPI_Comm              comm;
5539   PetscSF               sf,osf;
5540   PetscBool             has;
5541   PetscErrorCode        ierr;
5542 
5543   PetscFunctionBegin;
5544   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5545   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5546   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5547    *  and then create a submatrix (that often is an overlapping matrix)
5548    * */
5549   if (reuse==MAT_INITIAL_MATRIX) {
5550     /* Use a hash table to figure out unique keys */
5551     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5552     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5553     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5554     count = 0;
5555     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5556     for (i=0;i<a->B->cmap->n;i++) {
5557       key  = a->garray[i]/dof;
5558       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5559       if (!has) {
5560         mapping[i] = count;
5561         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5562       } else {
5563         /* Current 'i' has the same value the previous step */
5564         mapping[i] = count-1;
5565       }
5566     }
5567     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5568     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5569     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5570     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5571     off = 0;
5572     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5573     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5574     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5575     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5576     /* In case, the matrix was already created but users want to recreate the matrix */
5577     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5578     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5579     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5580     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5581   } else if (reuse==MAT_REUSE_MATRIX) {
5582     /* If matrix was already created, we simply update values using SF objects
5583      * that as attached to the matrix ealier.
5584      *  */
5585     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5586     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5587     if (!sf || !osf) {
5588       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5589     }
5590     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5591     /* Update values in place */
5592     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5593     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5594     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5595     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5596   } else {
5597     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5598   }
5599   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5600   PetscFunctionReturn(0);
5601 }
5602 
5603 /*@C
5604     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5605 
5606     Collective on Mat
5607 
5608    Input Parameters:
5609 +    A,B - the matrices in mpiaij format
5610 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5611 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5612 
5613    Output Parameter:
5614 +    rowb, colb - index sets of rows and columns of B to extract
5615 -    B_seq - the sequential matrix generated
5616 
5617     Level: developer
5618 
5619 @*/
5620 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5621 {
5622   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5623   PetscErrorCode ierr;
5624   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5625   IS             isrowb,iscolb;
5626   Mat            *bseq=NULL;
5627 
5628   PetscFunctionBegin;
5629   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5630     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5631   }
5632   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5633 
5634   if (scall == MAT_INITIAL_MATRIX) {
5635     start = A->cmap->rstart;
5636     cmap  = a->garray;
5637     nzA   = a->A->cmap->n;
5638     nzB   = a->B->cmap->n;
5639     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5640     ncols = 0;
5641     for (i=0; i<nzB; i++) {  /* row < local row index */
5642       if (cmap[i] < start) idx[ncols++] = cmap[i];
5643       else break;
5644     }
5645     imark = i;
5646     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5647     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5648     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5649     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5650   } else {
5651     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5652     isrowb  = *rowb; iscolb = *colb;
5653     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5654     bseq[0] = *B_seq;
5655   }
5656   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5657   *B_seq = bseq[0];
5658   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5659   if (!rowb) {
5660     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5661   } else {
5662     *rowb = isrowb;
5663   }
5664   if (!colb) {
5665     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5666   } else {
5667     *colb = iscolb;
5668   }
5669   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5670   PetscFunctionReturn(0);
5671 }
5672 
5673 /*
5674     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5675     of the OFF-DIAGONAL portion of local A
5676 
5677     Collective on Mat
5678 
5679    Input Parameters:
5680 +    A,B - the matrices in mpiaij format
5681 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5682 
5683    Output Parameter:
5684 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5685 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5686 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5687 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5688 
5689     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5690      for this matrix. This is not desirable..
5691 
5692     Level: developer
5693 
5694 */
5695 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5696 {
5697   PetscErrorCode         ierr;
5698   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5699   Mat_SeqAIJ             *b_oth;
5700   VecScatter             ctx;
5701   MPI_Comm               comm;
5702   const PetscMPIInt      *rprocs,*sprocs;
5703   const PetscInt         *srow,*rstarts,*sstarts;
5704   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5705   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5706   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5707   MPI_Request            *rwaits = NULL,*swaits = NULL;
5708   MPI_Status             rstatus;
5709   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5710 
5711   PetscFunctionBegin;
5712   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5713   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5714 
5715   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5716     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5717   }
5718   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5719   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5720 
5721   if (size == 1) {
5722     startsj_s = NULL;
5723     bufa_ptr  = NULL;
5724     *B_oth    = NULL;
5725     PetscFunctionReturn(0);
5726   }
5727 
5728   ctx = a->Mvctx;
5729   tag = ((PetscObject)ctx)->tag;
5730 
5731   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5732   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5733   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5734   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5735   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5736   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5737   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5738 
5739   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5740   if (scall == MAT_INITIAL_MATRIX) {
5741     /* i-array */
5742     /*---------*/
5743     /*  post receives */
5744     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5745     for (i=0; i<nrecvs; i++) {
5746       rowlen = rvalues + rstarts[i]*rbs;
5747       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5748       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5749     }
5750 
5751     /* pack the outgoing message */
5752     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5753 
5754     sstartsj[0] = 0;
5755     rstartsj[0] = 0;
5756     len         = 0; /* total length of j or a array to be sent */
5757     if (nsends) {
5758       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5759       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5760     }
5761     for (i=0; i<nsends; i++) {
5762       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5763       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5764       for (j=0; j<nrows; j++) {
5765         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5766         for (l=0; l<sbs; l++) {
5767           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5768 
5769           rowlen[j*sbs+l] = ncols;
5770 
5771           len += ncols;
5772           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5773         }
5774         k++;
5775       }
5776       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5777 
5778       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5779     }
5780     /* recvs and sends of i-array are completed */
5781     i = nrecvs;
5782     while (i--) {
5783       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5784     }
5785     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5786     ierr = PetscFree(svalues);CHKERRQ(ierr);
5787 
5788     /* allocate buffers for sending j and a arrays */
5789     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5790     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5791 
5792     /* create i-array of B_oth */
5793     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5794 
5795     b_othi[0] = 0;
5796     len       = 0; /* total length of j or a array to be received */
5797     k         = 0;
5798     for (i=0; i<nrecvs; i++) {
5799       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5800       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5801       for (j=0; j<nrows; j++) {
5802         b_othi[k+1] = b_othi[k] + rowlen[j];
5803         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5804         k++;
5805       }
5806       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5807     }
5808     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5809 
5810     /* allocate space for j and a arrrays of B_oth */
5811     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5812     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5813 
5814     /* j-array */
5815     /*---------*/
5816     /*  post receives of j-array */
5817     for (i=0; i<nrecvs; i++) {
5818       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5819       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5820     }
5821 
5822     /* pack the outgoing message j-array */
5823     if (nsends) k = sstarts[0];
5824     for (i=0; i<nsends; i++) {
5825       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5826       bufJ  = bufj+sstartsj[i];
5827       for (j=0; j<nrows; j++) {
5828         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5829         for (ll=0; ll<sbs; ll++) {
5830           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5831           for (l=0; l<ncols; l++) {
5832             *bufJ++ = cols[l];
5833           }
5834           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5835         }
5836       }
5837       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5838     }
5839 
5840     /* recvs and sends of j-array are completed */
5841     i = nrecvs;
5842     while (i--) {
5843       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5844     }
5845     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5846   } else if (scall == MAT_REUSE_MATRIX) {
5847     sstartsj = *startsj_s;
5848     rstartsj = *startsj_r;
5849     bufa     = *bufa_ptr;
5850     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5851     b_otha   = b_oth->a;
5852   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5853 
5854   /* a-array */
5855   /*---------*/
5856   /*  post receives of a-array */
5857   for (i=0; i<nrecvs; i++) {
5858     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5859     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5860   }
5861 
5862   /* pack the outgoing message a-array */
5863   if (nsends) k = sstarts[0];
5864   for (i=0; i<nsends; i++) {
5865     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5866     bufA  = bufa+sstartsj[i];
5867     for (j=0; j<nrows; j++) {
5868       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5869       for (ll=0; ll<sbs; ll++) {
5870         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5871         for (l=0; l<ncols; l++) {
5872           *bufA++ = vals[l];
5873         }
5874         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5875       }
5876     }
5877     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5878   }
5879   /* recvs and sends of a-array are completed */
5880   i = nrecvs;
5881   while (i--) {
5882     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5883   }
5884   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5885   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5886 
5887   if (scall == MAT_INITIAL_MATRIX) {
5888     /* put together the new matrix */
5889     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5890 
5891     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5892     /* Since these are PETSc arrays, change flags to free them as necessary. */
5893     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5894     b_oth->free_a  = PETSC_TRUE;
5895     b_oth->free_ij = PETSC_TRUE;
5896     b_oth->nonew   = 0;
5897 
5898     ierr = PetscFree(bufj);CHKERRQ(ierr);
5899     if (!startsj_s || !bufa_ptr) {
5900       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5901       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5902     } else {
5903       *startsj_s = sstartsj;
5904       *startsj_r = rstartsj;
5905       *bufa_ptr  = bufa;
5906     }
5907   }
5908 
5909   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5910   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5911   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5912   PetscFunctionReturn(0);
5913 }
5914 
5915 /*@C
5916   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5917 
5918   Not Collective
5919 
5920   Input Parameters:
5921 . A - The matrix in mpiaij format
5922 
5923   Output Parameter:
5924 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5925 . colmap - A map from global column index to local index into lvec
5926 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5927 
5928   Level: developer
5929 
5930 @*/
5931 #if defined(PETSC_USE_CTABLE)
5932 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5933 #else
5934 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5935 #endif
5936 {
5937   Mat_MPIAIJ *a;
5938 
5939   PetscFunctionBegin;
5940   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5941   PetscValidPointer(lvec, 2);
5942   PetscValidPointer(colmap, 3);
5943   PetscValidPointer(multScatter, 4);
5944   a = (Mat_MPIAIJ*) A->data;
5945   if (lvec) *lvec = a->lvec;
5946   if (colmap) *colmap = a->colmap;
5947   if (multScatter) *multScatter = a->Mvctx;
5948   PetscFunctionReturn(0);
5949 }
5950 
5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5954 #if defined(PETSC_HAVE_MKL_SPARSE)
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5956 #endif
5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5959 #if defined(PETSC_HAVE_ELEMENTAL)
5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5961 #endif
5962 #if defined(PETSC_HAVE_HYPRE)
5963 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5964 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5965 #endif
5966 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5967 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5968 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5969 
5970 /*
5971     Computes (B'*A')' since computing B*A directly is untenable
5972 
5973                n                       p                          p
5974         (              )       (              )         (                  )
5975       m (      A       )  *  n (       B      )   =   m (         C        )
5976         (              )       (              )         (                  )
5977 
5978 */
5979 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5980 {
5981   PetscErrorCode ierr;
5982   Mat            At,Bt,Ct;
5983 
5984   PetscFunctionBegin;
5985   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5986   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5987   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5988   ierr = MatDestroy(&At);CHKERRQ(ierr);
5989   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5990   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5991   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5992   PetscFunctionReturn(0);
5993 }
5994 
5995 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5996 {
5997   PetscErrorCode ierr;
5998   PetscInt       m=A->rmap->n,n=B->cmap->n;
5999   Mat            Cmat;
6000 
6001   PetscFunctionBegin;
6002   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
6003   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
6004   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
6005   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
6006   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
6007   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
6008   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6009   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6010 
6011   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6012 
6013   *C = Cmat;
6014   PetscFunctionReturn(0);
6015 }
6016 
6017 /* ----------------------------------------------------------------*/
6018 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
6019 {
6020   PetscErrorCode ierr;
6021 
6022   PetscFunctionBegin;
6023   if (scall == MAT_INITIAL_MATRIX) {
6024     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
6025     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
6026     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
6027   }
6028   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
6029   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
6030   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
6031   PetscFunctionReturn(0);
6032 }
6033 
6034 /*MC
6035    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6036 
6037    Options Database Keys:
6038 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6039 
6040    Level: beginner
6041 
6042    Notes:
6043     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6044     in this case the values associated with the rows and columns one passes in are set to zero
6045     in the matrix
6046 
6047     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6048     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6049 
6050 .seealso: MatCreateAIJ()
6051 M*/
6052 
6053 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6054 {
6055   Mat_MPIAIJ     *b;
6056   PetscErrorCode ierr;
6057   PetscMPIInt    size;
6058 
6059   PetscFunctionBegin;
6060   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6061 
6062   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6063   B->data       = (void*)b;
6064   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6065   B->assembled  = PETSC_FALSE;
6066   B->insertmode = NOT_SET_VALUES;
6067   b->size       = size;
6068 
6069   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6070 
6071   /* build cache for off array entries formed */
6072   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6073 
6074   b->donotstash  = PETSC_FALSE;
6075   b->colmap      = 0;
6076   b->garray      = 0;
6077   b->roworiented = PETSC_TRUE;
6078 
6079   /* stuff used for matrix vector multiply */
6080   b->lvec  = NULL;
6081   b->Mvctx = NULL;
6082 
6083   /* stuff for MatGetRow() */
6084   b->rowindices   = 0;
6085   b->rowvalues    = 0;
6086   b->getrowactive = PETSC_FALSE;
6087 
6088   /* flexible pointer used in CUSP/CUSPARSE classes */
6089   b->spptr = NULL;
6090 
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6101 #if defined(PETSC_HAVE_MKL_SPARSE)
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6103 #endif
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6107 #if defined(PETSC_HAVE_ELEMENTAL)
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6109 #endif
6110 #if defined(PETSC_HAVE_HYPRE)
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6112 #endif
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6118 #if defined(PETSC_HAVE_HYPRE)
6119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6120 #endif
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6122   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6123   PetscFunctionReturn(0);
6124 }
6125 
6126 /*@C
6127      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6128          and "off-diagonal" part of the matrix in CSR format.
6129 
6130    Collective
6131 
6132    Input Parameters:
6133 +  comm - MPI communicator
6134 .  m - number of local rows (Cannot be PETSC_DECIDE)
6135 .  n - This value should be the same as the local size used in creating the
6136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6137        calculated if N is given) For square matrices n is almost always m.
6138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6140 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6141 .   j - column indices
6142 .   a - matrix values
6143 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6144 .   oj - column indices
6145 -   oa - matrix values
6146 
6147    Output Parameter:
6148 .   mat - the matrix
6149 
6150    Level: advanced
6151 
6152    Notes:
6153        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6154        must free the arrays once the matrix has been destroyed and not before.
6155 
6156        The i and j indices are 0 based
6157 
6158        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6159 
6160        This sets local rows and cannot be used to set off-processor values.
6161 
6162        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6163        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6164        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6165        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6166        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6167        communication if it is known that only local entries will be set.
6168 
6169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6170           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6171 @*/
6172 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6173 {
6174   PetscErrorCode ierr;
6175   Mat_MPIAIJ     *maij;
6176 
6177   PetscFunctionBegin;
6178   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6179   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6180   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6181   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6182   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6183   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6184   maij = (Mat_MPIAIJ*) (*mat)->data;
6185 
6186   (*mat)->preallocated = PETSC_TRUE;
6187 
6188   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6189   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6190 
6191   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6192   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6193 
6194   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6195   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6196   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6197   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6198 
6199   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6200   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6201   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6202   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6203   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6204   PetscFunctionReturn(0);
6205 }
6206 
6207 /*
6208     Special version for direct calls from Fortran
6209 */
6210 #include <petsc/private/fortranimpl.h>
6211 
6212 /* Change these macros so can be used in void function */
6213 #undef CHKERRQ
6214 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6215 #undef SETERRQ2
6216 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6217 #undef SETERRQ3
6218 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6219 #undef SETERRQ
6220 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6221 
6222 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6223 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6224 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6225 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6226 #else
6227 #endif
6228 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6229 {
6230   Mat            mat  = *mmat;
6231   PetscInt       m    = *mm, n = *mn;
6232   InsertMode     addv = *maddv;
6233   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6234   PetscScalar    value;
6235   PetscErrorCode ierr;
6236 
6237   MatCheckPreallocated(mat,1);
6238   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6239 
6240 #if defined(PETSC_USE_DEBUG)
6241   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6242 #endif
6243   {
6244     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6245     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6246     PetscBool roworiented = aij->roworiented;
6247 
6248     /* Some Variables required in the macro */
6249     Mat        A                    = aij->A;
6250     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6251     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6252     MatScalar  *aa                  = a->a;
6253     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6254     Mat        B                    = aij->B;
6255     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6256     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6257     MatScalar  *ba                  = b->a;
6258     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6259      * cannot use "#if defined" inside a macro. */
6260     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6261 
6262     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6263     PetscInt  nonew = a->nonew;
6264     MatScalar *ap1,*ap2;
6265 
6266     PetscFunctionBegin;
6267     for (i=0; i<m; i++) {
6268       if (im[i] < 0) continue;
6269 #if defined(PETSC_USE_DEBUG)
6270       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6271 #endif
6272       if (im[i] >= rstart && im[i] < rend) {
6273         row      = im[i] - rstart;
6274         lastcol1 = -1;
6275         rp1      = aj + ai[row];
6276         ap1      = aa + ai[row];
6277         rmax1    = aimax[row];
6278         nrow1    = ailen[row];
6279         low1     = 0;
6280         high1    = nrow1;
6281         lastcol2 = -1;
6282         rp2      = bj + bi[row];
6283         ap2      = ba + bi[row];
6284         rmax2    = bimax[row];
6285         nrow2    = bilen[row];
6286         low2     = 0;
6287         high2    = nrow2;
6288 
6289         for (j=0; j<n; j++) {
6290           if (roworiented) value = v[i*n+j];
6291           else value = v[i+j*m];
6292           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6293           if (in[j] >= cstart && in[j] < cend) {
6294             col = in[j] - cstart;
6295             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6296 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6297             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6298 #endif
6299           } else if (in[j] < 0) continue;
6300 #if defined(PETSC_USE_DEBUG)
6301           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6302           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6303 #endif
6304           else {
6305             if (mat->was_assembled) {
6306               if (!aij->colmap) {
6307                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6308               }
6309 #if defined(PETSC_USE_CTABLE)
6310               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6311               col--;
6312 #else
6313               col = aij->colmap[in[j]] - 1;
6314 #endif
6315               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6316                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6317                 col  =  in[j];
6318                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6319                 B        = aij->B;
6320                 b        = (Mat_SeqAIJ*)B->data;
6321                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6322                 rp2      = bj + bi[row];
6323                 ap2      = ba + bi[row];
6324                 rmax2    = bimax[row];
6325                 nrow2    = bilen[row];
6326                 low2     = 0;
6327                 high2    = nrow2;
6328                 bm       = aij->B->rmap->n;
6329                 ba       = b->a;
6330                 inserted = PETSC_FALSE;
6331               }
6332             } else col = in[j];
6333             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6334 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6335             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6336 #endif
6337           }
6338         }
6339       } else if (!aij->donotstash) {
6340         if (roworiented) {
6341           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6342         } else {
6343           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6344         }
6345       }
6346     }
6347   }
6348   PetscFunctionReturnVoid();
6349 }
6350