xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1873fc9fedfc3bd8fc2f82ccd29ab8b44a80f1db)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           goto a_noinsert; \
468         } \
469       }  \
470       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
471       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
472       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
473       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
474       N = nrow1++ - 1; a->nz++; high1++; \
475       /* shift up all the later entries in this row */ \
476       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
477       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
478       rp1[_i] = col;  \
479       ap1[_i] = value;  \
480       A->nonzerostate++;\
481       a_noinsert: ; \
482       ailen[row] = nrow1; \
483 }
484 
485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
486   { \
487     if (col <= lastcol2) low2 = 0;                        \
488     else high2 = nrow2;                                   \
489     lastcol2 = col;                                       \
490     while (high2-low2 > 5) {                              \
491       t = (low2+high2)/2;                                 \
492       if (rp2[t] > col) high2 = t;                        \
493       else             low2  = t;                         \
494     }                                                     \
495     for (_i=low2; _i<high2; _i++) {                       \
496       if (rp2[_i] > col) break;                           \
497       if (rp2[_i] == col) {                               \
498         if (addv == ADD_VALUES) {                         \
499           ap2[_i] += value;                               \
500           (void)PetscLogFlops(1.0);                       \
501         }                                                 \
502         else                    ap2[_i] = value;          \
503         goto b_noinsert;                                  \
504       }                                                   \
505     }                                                     \
506     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
507     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
508     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
509     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
510     N = nrow2++ - 1; b->nz++; high2++;                    \
511     /* shift up all the later entries in this row */      \
512     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
513     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
514     rp2[_i] = col;                                        \
515     ap2[_i] = value;                                      \
516     B->nonzerostate++;                                    \
517     b_noinsert: ;                                         \
518     bilen[row] = nrow2;                                   \
519   }
520 
521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
522 {
523   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
524   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
525   PetscErrorCode ierr;
526   PetscInt       l,*garray = mat->garray,diag;
527 
528   PetscFunctionBegin;
529   /* code only works for square matrices A */
530 
531   /* find size of row to the left of the diagonal part */
532   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
533   row  = row - diag;
534   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
535     if (garray[b->j[b->i[row]+l]] > diag) break;
536   }
537   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
538 
539   /* diagonal part */
540   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
541 
542   /* right of diagonal part */
543   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
548 {
549   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
550   PetscScalar    value = 0.0;
551   PetscErrorCode ierr;
552   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
554   PetscBool      roworiented = aij->roworiented;
555 
556   /* Some Variables required in the macro */
557   Mat        A                 = aij->A;
558   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
559   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
560   MatScalar  *aa               = a->a;
561   PetscBool  ignorezeroentries = a->ignorezeroentries;
562   Mat        B                 = aij->B;
563   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
564   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
565   MatScalar  *ba               = b->a;
566 
567   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
568   PetscInt  nonew;
569   MatScalar *ap1,*ap2;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (im[i] < 0) continue;
574 #if defined(PETSC_USE_DEBUG)
575     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
576 #endif
577     if (im[i] >= rstart && im[i] < rend) {
578       row      = im[i] - rstart;
579       lastcol1 = -1;
580       rp1      = aj + ai[row];
581       ap1      = aa + ai[row];
582       rmax1    = aimax[row];
583       nrow1    = ailen[row];
584       low1     = 0;
585       high1    = nrow1;
586       lastcol2 = -1;
587       rp2      = bj + bi[row];
588       ap2      = ba + bi[row];
589       rmax2    = bimax[row];
590       nrow2    = bilen[row];
591       low2     = 0;
592       high2    = nrow2;
593 
594       for (j=0; j<n; j++) {
595         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
596         if (in[j] >= cstart && in[j] < cend) {
597           col   = in[j] - cstart;
598           nonew = a->nonew;
599           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
600           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
601         } else if (in[j] < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
604 #endif
605         else {
606           if (mat->was_assembled) {
607             if (!aij->colmap) {
608               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609             }
610 #if defined(PETSC_USE_CTABLE)
611             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
612             col--;
613 #else
614             col = aij->colmap[in[j]] - 1;
615 #endif
616             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
617               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
618               col  =  in[j];
619               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
620               B     = aij->B;
621               b     = (Mat_SeqAIJ*)B->data;
622               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
623               rp2   = bj + bi[row];
624               ap2   = ba + bi[row];
625               rmax2 = bimax[row];
626               nrow2 = bilen[row];
627               low2  = 0;
628               high2 = nrow2;
629               bm    = aij->B->rmap->n;
630               ba    = b->a;
631             } else if (col < 0) {
632               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
633                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
634               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
635             }
636           } else col = in[j];
637           nonew = b->nonew;
638           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
639         }
640       }
641     } else {
642       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
643       if (!aij->donotstash) {
644         mat->assembled = PETSC_FALSE;
645         if (roworiented) {
646           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
647         } else {
648           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
649         }
650       }
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
660 */
661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
662 {
663   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
664   Mat            A           = aij->A; /* diagonal part of the matrix */
665   Mat            B           = aij->B; /* offdiagonal part of the matrix */
666   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
667   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
668   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
669   PetscInt       *ailen      = a->ilen,*aj = a->j;
670   PetscInt       *bilen      = b->ilen,*bj = b->j;
671   PetscInt       am          = aij->A->rmap->n,j;
672   PetscInt       diag_so_far = 0,dnz;
673   PetscInt       offd_so_far = 0,onz;
674 
675   PetscFunctionBegin;
676   /* Iterate over all rows of the matrix */
677   for (j=0; j<am; j++) {
678     dnz = onz = 0;
679     /*  Iterate over all non-zero columns of the current row */
680     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
681       /* If column is in the diagonal */
682       if (mat_j[col] >= cstart && mat_j[col] < cend) {
683         aj[diag_so_far++] = mat_j[col] - cstart;
684         dnz++;
685       } else { /* off-diagonal entries */
686         bj[offd_so_far++] = mat_j[col];
687         onz++;
688       }
689     }
690     ailen[j] = dnz;
691     bilen[j] = onz;
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 /*
697     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
698     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
699     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
700     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
701     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
702 */
703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
704 {
705   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
706   Mat            A      = aij->A; /* diagonal part of the matrix */
707   Mat            B      = aij->B; /* offdiagonal part of the matrix */
708   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
709   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
710   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
711   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
712   PetscInt       *ailen = a->ilen,*aj = a->j;
713   PetscInt       *bilen = b->ilen,*bj = b->j;
714   PetscInt       am     = aij->A->rmap->n,j;
715   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
716   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
717   PetscScalar    *aa = a->a,*ba = b->a;
718 
719   PetscFunctionBegin;
720   /* Iterate over all rows of the matrix */
721   for (j=0; j<am; j++) {
722     dnz_row = onz_row = 0;
723     rowstart_offd = full_offd_i[j];
724     rowstart_diag = full_diag_i[j];
725     /*  Iterate over all non-zero columns of the current row */
726     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
727       /* If column is in the diagonal */
728       if (mat_j[col] >= cstart && mat_j[col] < cend) {
729         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
730         aa[rowstart_diag+dnz_row] = mat_a[col];
731         dnz_row++;
732       } else { /* off-diagonal entries */
733         bj[rowstart_offd+onz_row] = mat_j[col];
734         ba[rowstart_offd+onz_row] = mat_a[col];
735         onz_row++;
736       }
737     }
738     ailen[j] = dnz_row;
739     bilen[j] = onz_row;
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
745 {
746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
749   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
750 
751   PetscFunctionBegin;
752   for (i=0; i<m; i++) {
753     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
754     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
755     if (idxm[i] >= rstart && idxm[i] < rend) {
756       row = idxm[i] - rstart;
757       for (j=0; j<n; j++) {
758         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
759         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
760         if (idxn[j] >= cstart && idxn[j] < cend) {
761           col  = idxn[j] - cstart;
762           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763         } else {
764           if (!aij->colmap) {
765             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
766           }
767 #if defined(PETSC_USE_CTABLE)
768           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
769           col--;
770 #else
771           col = aij->colmap[idxn[j]] - 1;
772 #endif
773           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
774           else {
775             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776           }
777         }
778       }
779     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
785 
786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
787 {
788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
789   PetscErrorCode ierr;
790   PetscInt       nstash,reallocs;
791 
792   PetscFunctionBegin;
793   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
794 
795   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
796   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
797   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
805   PetscErrorCode ierr;
806   PetscMPIInt    n;
807   PetscInt       i,j,rstart,ncols,flg;
808   PetscInt       *row,*col;
809   PetscBool      other_disassembled;
810   PetscScalar    *val;
811 
812   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
813 
814   PetscFunctionBegin;
815   if (!aij->donotstash && !mat->nooffprocentries) {
816     while (1) {
817       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
818       if (!flg) break;
819 
820       for (i=0; i<n; ) {
821         /* Now identify the consecutive vals belonging to the same row */
822         for (j=i,rstart=row[j]; j<n; j++) {
823           if (row[j] != rstart) break;
824         }
825         if (j < n) ncols = j-i;
826         else       ncols = n-i;
827         /* Now assemble all these values with a single function call */
828         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
829 
830         i = j;
831       }
832     }
833     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
834   }
835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
836   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
837 #endif
838   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
839   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
840 
841   /* determine if any processor has disassembled, if so we must
842      also disassemble ourself, in order that we may reassemble. */
843   /*
844      if nonzero structure of submatrix B cannot change then we know that
845      no processor disassembled thus we can skip this stuff
846   */
847   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
848     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
849     if (mat->was_assembled && !other_disassembled) {
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
852 #endif
853       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
854     }
855   }
856   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
857     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
858   }
859   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
861   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
862 #endif
863   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
865 
866   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
867 
868   aij->rowvalues = 0;
869 
870   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
871   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
875     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
876     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
877   }
878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
879   mat->offloadmask = PETSC_OFFLOAD_BOTH;
880 #endif
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
885 {
886   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
891   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
896 {
897   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
898   PetscObjectState sA, sB;
899   PetscInt        *lrows;
900   PetscInt         r, len;
901   PetscBool        cong, lch, gch;
902   PetscErrorCode   ierr;
903 
904   PetscFunctionBegin;
905   /* get locally owned rows */
906   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
907   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
908   /* fix right hand side if needed */
909   if (x && b) {
910     const PetscScalar *xx;
911     PetscScalar       *bb;
912 
913     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
914     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
917     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
918     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
919   }
920 
921   sA = mat->A->nonzerostate;
922   sB = mat->B->nonzerostate;
923 
924   if (diag != 0.0 && cong) {
925     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
926     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
927   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
928     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
929     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
930     PetscInt   nnwA, nnwB;
931     PetscBool  nnzA, nnzB;
932 
933     nnwA = aijA->nonew;
934     nnwB = aijB->nonew;
935     nnzA = aijA->keepnonzeropattern;
936     nnzB = aijB->keepnonzeropattern;
937     if (!nnzA) {
938       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
939       aijA->nonew = 0;
940     }
941     if (!nnzB) {
942       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
943       aijB->nonew = 0;
944     }
945     /* Must zero here before the next loop */
946     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
947     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
948     for (r = 0; r < len; ++r) {
949       const PetscInt row = lrows[r] + A->rmap->rstart;
950       if (row >= A->cmap->N) continue;
951       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
952     }
953     aijA->nonew = nnwA;
954     aijB->nonew = nnwB;
955   } else {
956     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
957     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958   }
959   ierr = PetscFree(lrows);CHKERRQ(ierr);
960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962 
963   /* reduce nonzerostate */
964   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
965   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
966   if (gch) A->nonzerostate++;
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
971 {
972   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode    ierr;
974   PetscMPIInt       n = A->rmap->n;
975   PetscInt          i,j,r,m,len = 0;
976   PetscInt          *lrows,*owners = A->rmap->range;
977   PetscMPIInt       p = 0;
978   PetscSFNode       *rrows;
979   PetscSF           sf;
980   const PetscScalar *xx;
981   PetscScalar       *bb,*mask;
982   Vec               xmask,lmask;
983   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
984   const PetscInt    *aj, *ii,*ridx;
985   PetscScalar       *aa;
986 
987   PetscFunctionBegin;
988   /* Create SF where leaves are input rows and roots are owned rows */
989   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
990   for (r = 0; r < n; ++r) lrows[r] = -1;
991   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
992   for (r = 0; r < N; ++r) {
993     const PetscInt idx   = rows[r];
994     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
995     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
996       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
997     }
998     rrows[r].rank  = p;
999     rrows[r].index = rows[r] - owners[p];
1000   }
1001   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1002   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1003   /* Collect flags for rows to be zeroed */
1004   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1005   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1006   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1007   /* Compress and put in row numbers */
1008   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1009   /* zero diagonal part of matrix */
1010   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1011   /* handle off diagonal part of matrix */
1012   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1013   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1014   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1015   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1016   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1017   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1020   if (x && b) { /* this code is buggy when the row and column layout don't match */
1021     PetscBool cong;
1022 
1023     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1024     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1025     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1028     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1029   }
1030   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1031   /* remove zeroed rows of off diagonal matrix */
1032   ii = aij->i;
1033   for (i=0; i<len; i++) {
1034     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1035   }
1036   /* loop over all elements of off process part of matrix zeroing removed columns*/
1037   if (aij->compressedrow.use) {
1038     m    = aij->compressedrow.nrows;
1039     ii   = aij->compressedrow.i;
1040     ridx = aij->compressedrow.rindex;
1041     for (i=0; i<m; i++) {
1042       n  = ii[i+1] - ii[i];
1043       aj = aij->j + ii[i];
1044       aa = aij->a + ii[i];
1045 
1046       for (j=0; j<n; j++) {
1047         if (PetscAbsScalar(mask[*aj])) {
1048           if (b) bb[*ridx] -= *aa*xx[*aj];
1049           *aa = 0.0;
1050         }
1051         aa++;
1052         aj++;
1053       }
1054       ridx++;
1055     }
1056   } else { /* do not use compressed row format */
1057     m = l->B->rmap->n;
1058     for (i=0; i<m; i++) {
1059       n  = ii[i+1] - ii[i];
1060       aj = aij->j + ii[i];
1061       aa = aij->a + ii[i];
1062       for (j=0; j<n; j++) {
1063         if (PetscAbsScalar(mask[*aj])) {
1064           if (b) bb[i] -= *aa*xx[*aj];
1065           *aa = 0.0;
1066         }
1067         aa++;
1068         aj++;
1069       }
1070     }
1071   }
1072   if (x && b) {
1073     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1074     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1075   }
1076   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1077   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1078   ierr = PetscFree(lrows);CHKERRQ(ierr);
1079 
1080   /* only change matrix nonzero state if pattern was allowed to be changed */
1081   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1082     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1083     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092   PetscInt       nt;
1093   VecScatter     Mvctx = a->Mvctx;
1094 
1095   PetscFunctionBegin;
1096   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1097   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1098 
1099   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1100   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1101   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1102   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1107 {
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   VecScatter     Mvctx = a->Mvctx;
1121 
1122   PetscFunctionBegin;
1123   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1124   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1125   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1126   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1127   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   /* do nondiagonal part */
1138   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1139   /* do local part */
1140   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141   /* add partial results together */
1142   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1148 {
1149   MPI_Comm       comm;
1150   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1151   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1152   IS             Me,Notme;
1153   PetscErrorCode ierr;
1154   PetscInt       M,N,first,last,*notme,i;
1155   PetscBool      lf;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159   /* Easy test: symmetric diagonal block */
1160   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1161   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1162   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1190 {
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1199 {
1200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   /* do nondiagonal part */
1205   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* add partial results together */
1209   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /*
1215   This only works correctly for square matrices where the subblock A->A is the
1216    diagonal block
1217 */
1218 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1219 {
1220   PetscErrorCode ierr;
1221   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1222 
1223   PetscFunctionBegin;
1224   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1225   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1226   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1231 {
1232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1237   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1242 {
1243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247 #if defined(PETSC_USE_LOG)
1248   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1249 #endif
1250   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1251   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1252   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1254 #if defined(PETSC_USE_CTABLE)
1255   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1256 #else
1257   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1258 #endif
1259   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1261   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1262   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1277 #if defined(PETSC_HAVE_ELEMENTAL)
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1279 #endif
1280 #if defined(PETSC_HAVE_HYPRE)
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1283 #endif
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1292   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1293   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1294   PetscErrorCode ierr;
1295   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1296   int            fd;
1297   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1298   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1299   PetscScalar    *column_values;
1300   PetscInt       message_count,flowcontrolcount;
1301   FILE           *file;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312 
1313     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen  = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1319   } else {
1320     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     rlen = mat->rmap->n;
1322   }
1323 
1324   /* load up the local row counts */
1325   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1326   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1327 
1328   /* store the row lengths to the file */
1329   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1330   if (!rank) {
1331     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     for (i=1; i<size; i++) {
1333       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1334       rlen = range[i+1] - range[i];
1335       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     }
1338     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1339   } else {
1340     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1341     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1342     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1343   }
1344   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1345 
1346   /* load up the local column indices */
1347   nzmax = nz; /* th processor needs space a largest processor needs */
1348   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1349   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1350   cnt   = 0;
1351   for (i=0; i<mat->rmap->n; i++) {
1352     for (j=B->i[i]; j<B->i[i+1]; j++) {
1353       if ((col = garray[B->j[j]]) > cstart) break;
1354       column_indices[cnt++] = col;
1355     }
1356     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1357     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1358   }
1359   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1360 
1361   /* store the column indices to the file */
1362   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1363   if (!rank) {
1364     MPI_Status status;
1365     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1366     for (i=1; i<size; i++) {
1367       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1368       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1369       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1370       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1371       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1372     }
1373     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1374   } else {
1375     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1376     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1379   }
1380   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1381 
1382   /* load up the local column values */
1383   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1384   cnt  = 0;
1385   for (i=0; i<mat->rmap->n; i++) {
1386     for (j=B->i[i]; j<B->i[i+1]; j++) {
1387       if (garray[B->j[j]] > cstart) break;
1388       column_values[cnt++] = B->a[j];
1389     }
1390     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1391     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1392   }
1393   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1394 
1395   /* store the column values to the file */
1396   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1397   if (!rank) {
1398     MPI_Status status;
1399     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1400     for (i=1; i<size; i++) {
1401       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1402       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1403       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1404       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1405       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     }
1407     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1410     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1411     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1412     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1413   }
1414   ierr = PetscFree(column_values);CHKERRQ(ierr);
1415 
1416   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1417   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #include <petscdraw.h>
1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1423 {
1424   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1425   PetscErrorCode    ierr;
1426   PetscMPIInt       rank = aij->rank,size = aij->size;
1427   PetscBool         isdraw,iascii,isbinary;
1428   PetscViewer       sviewer;
1429   PetscViewerFormat format;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1435   if (iascii) {
1436     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1437     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1438       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1439       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1440       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1441       for (i=0; i<(PetscInt)size; i++) {
1442         nmax = PetscMax(nmax,nz[i]);
1443         nmin = PetscMin(nmin,nz[i]);
1444         navg += nz[i];
1445       }
1446       ierr = PetscFree(nz);CHKERRQ(ierr);
1447       navg = navg/size;
1448       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1449       PetscFunctionReturn(0);
1450     }
1451     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1452     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1453       MatInfo   info;
1454       PetscBool inodes;
1455 
1456       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1457       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1458       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1460       if (!inodes) {
1461         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1462                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1463       } else {
1464         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1465                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1466       }
1467       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1468       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1469       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1470       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1471       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1474       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1475       PetscFunctionReturn(0);
1476     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1477       PetscInt inodecount,inodelimit,*inodes;
1478       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1479       if (inodes) {
1480         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1481       } else {
1482         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1483       }
1484       PetscFunctionReturn(0);
1485     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1486       PetscFunctionReturn(0);
1487     }
1488   } else if (isbinary) {
1489     if (size == 1) {
1490       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1491       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1492     } else {
1493       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1494     }
1495     PetscFunctionReturn(0);
1496   } else if (iascii && size == 1) {
1497     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     PetscFunctionReturn(0);
1500   } else if (isdraw) {
1501     PetscDraw draw;
1502     PetscBool isnull;
1503     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1504     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1505     if (isnull) PetscFunctionReturn(0);
1506   }
1507 
1508   { /* assemble the entire matrix onto first processor */
1509     Mat A = NULL, Av;
1510     IS  isrow,iscol;
1511 
1512     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1513     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1514     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1515     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1516 /*  The commented code uses MatCreateSubMatrices instead */
1517 /*
1518     Mat *AA, A = NULL, Av;
1519     IS  isrow,iscol;
1520 
1521     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1522     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1523     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1524     if (!rank) {
1525        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1526        A    = AA[0];
1527        Av   = AA[0];
1528     }
1529     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1530 */
1531     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1532     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1533     /*
1534        Everyone has to call to draw the matrix since the graphics waits are
1535        synchronized across all processors that share the PetscDraw object
1536     */
1537     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1538     if (!rank) {
1539       if (((PetscObject)mat)->name) {
1540         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1541       }
1542       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1543     }
1544     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1545     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1546     ierr = MatDestroy(&A);CHKERRQ(ierr);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1552 {
1553   PetscErrorCode ierr;
1554   PetscBool      iascii,isdraw,issocket,isbinary;
1555 
1556   PetscFunctionBegin;
1557   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1558   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1559   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1560   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1561   if (iascii || isdraw || isbinary || issocket) {
1562     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1568 {
1569   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1570   PetscErrorCode ierr;
1571   Vec            bb1 = 0;
1572   PetscBool      hasop;
1573 
1574   PetscFunctionBegin;
1575   if (flag == SOR_APPLY_UPPER) {
1576     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1577     PetscFunctionReturn(0);
1578   }
1579 
1580   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1581     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1582   }
1583 
1584   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1585     if (flag & SOR_ZERO_INITIAL_GUESS) {
1586       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1587       its--;
1588     }
1589 
1590     while (its--) {
1591       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593 
1594       /* update rhs: bb1 = bb - B*x */
1595       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1596       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1597 
1598       /* local sweep */
1599       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1600     }
1601   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1602     if (flag & SOR_ZERO_INITIAL_GUESS) {
1603       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1604       its--;
1605     }
1606     while (its--) {
1607       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609 
1610       /* update rhs: bb1 = bb - B*x */
1611       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1612       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1613 
1614       /* local sweep */
1615       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1616     }
1617   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622     while (its--) {
1623       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626       /* update rhs: bb1 = bb - B*x */
1627       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1628       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1629 
1630       /* local sweep */
1631       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1632     }
1633   } else if (flag & SOR_EISENSTAT) {
1634     Vec xx1;
1635 
1636     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1637     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1638 
1639     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641     if (!mat->diag) {
1642       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1643       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1644     }
1645     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1646     if (hasop) {
1647       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1648     } else {
1649       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1650     }
1651     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1652 
1653     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1654 
1655     /* local sweep */
1656     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1657     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1658     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1659   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1660 
1661   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1662 
1663   matin->factorerrortype = mat->A->factorerrortype;
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1668 {
1669   Mat            aA,aB,Aperm;
1670   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1671   PetscScalar    *aa,*ba;
1672   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1673   PetscSF        rowsf,sf;
1674   IS             parcolp = NULL;
1675   PetscBool      done;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1680   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1681   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1682   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1683 
1684   /* Invert row permutation to find out where my rows should go */
1685   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1686   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1687   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1688   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1689   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1690   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1691 
1692   /* Invert column permutation to find out where my columns should go */
1693   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1694   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1695   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1696   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1697   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1698   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1699   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1700 
1701   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1702   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1703   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1704 
1705   /* Find out where my gcols should go */
1706   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1707   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1708   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1709   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1710   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1711   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1712   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1713   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1714 
1715   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1716   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1717   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1718   for (i=0; i<m; i++) {
1719     PetscInt    row = rdest[i];
1720     PetscMPIInt rowner;
1721     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1722     for (j=ai[i]; j<ai[i+1]; j++) {
1723       PetscInt    col = cdest[aj[j]];
1724       PetscMPIInt cowner;
1725       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1726       if (rowner == cowner) dnnz[i]++;
1727       else onnz[i]++;
1728     }
1729     for (j=bi[i]; j<bi[i+1]; j++) {
1730       PetscInt    col = gcdest[bj[j]];
1731       PetscMPIInt cowner;
1732       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1733       if (rowner == cowner) dnnz[i]++;
1734       else onnz[i]++;
1735     }
1736   }
1737   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1738   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1739   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1740   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1741   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1742 
1743   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1744   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1745   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1746   for (i=0; i<m; i++) {
1747     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1748     PetscInt j0,rowlen;
1749     rowlen = ai[i+1] - ai[i];
1750     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1751       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1752       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1753     }
1754     rowlen = bi[i+1] - bi[i];
1755     for (j0=j=0; j<rowlen; j0=j) {
1756       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1757       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1758     }
1759   }
1760   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1761   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1762   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1763   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1764   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1765   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1766   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1767   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1768   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1769   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1770   *B = Aperm;
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1775 {
1776   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1781   if (ghosts) *ghosts = aij->garray;
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1786 {
1787   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1788   Mat            A    = mat->A,B = mat->B;
1789   PetscErrorCode ierr;
1790   PetscLogDouble isend[5],irecv[5];
1791 
1792   PetscFunctionBegin;
1793   info->block_size = 1.0;
1794   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1795 
1796   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1797   isend[3] = info->memory;  isend[4] = info->mallocs;
1798 
1799   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1800 
1801   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1802   isend[3] += info->memory;  isend[4] += info->mallocs;
1803   if (flag == MAT_LOCAL) {
1804     info->nz_used      = isend[0];
1805     info->nz_allocated = isend[1];
1806     info->nz_unneeded  = isend[2];
1807     info->memory       = isend[3];
1808     info->mallocs      = isend[4];
1809   } else if (flag == MAT_GLOBAL_MAX) {
1810     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1811 
1812     info->nz_used      = irecv[0];
1813     info->nz_allocated = irecv[1];
1814     info->nz_unneeded  = irecv[2];
1815     info->memory       = irecv[3];
1816     info->mallocs      = irecv[4];
1817   } else if (flag == MAT_GLOBAL_SUM) {
1818     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1819 
1820     info->nz_used      = irecv[0];
1821     info->nz_allocated = irecv[1];
1822     info->nz_unneeded  = irecv[2];
1823     info->memory       = irecv[3];
1824     info->mallocs      = irecv[4];
1825   }
1826   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1827   info->fill_ratio_needed = 0;
1828   info->factor_mallocs    = 0;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1833 {
1834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1835   PetscErrorCode ierr;
1836 
1837   PetscFunctionBegin;
1838   switch (op) {
1839   case MAT_NEW_NONZERO_LOCATIONS:
1840   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1841   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1842   case MAT_KEEP_NONZERO_PATTERN:
1843   case MAT_NEW_NONZERO_LOCATION_ERR:
1844   case MAT_USE_INODES:
1845   case MAT_IGNORE_ZERO_ENTRIES:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1849     break;
1850   case MAT_ROW_ORIENTED:
1851     MatCheckPreallocated(A,1);
1852     a->roworiented = flg;
1853 
1854     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1855     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1856     break;
1857   case MAT_NEW_DIAGONALS:
1858   case MAT_SORTED_FULL:
1859     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1860     break;
1861   case MAT_IGNORE_OFF_PROC_ENTRIES:
1862     a->donotstash = flg;
1863     break;
1864   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1865   case MAT_SPD:
1866   case MAT_SYMMETRIC:
1867   case MAT_STRUCTURALLY_SYMMETRIC:
1868   case MAT_HERMITIAN:
1869   case MAT_SYMMETRY_ETERNAL:
1870     break;
1871   case MAT_SUBMAT_SINGLEIS:
1872     A->submat_singleis = flg;
1873     break;
1874   case MAT_STRUCTURE_ONLY:
1875     /* The option is handled directly by MatSetOption() */
1876     break;
1877   default:
1878     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1884 {
1885   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1886   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1887   PetscErrorCode ierr;
1888   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1889   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1890   PetscInt       *cmap,*idx_p;
1891 
1892   PetscFunctionBegin;
1893   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1894   mat->getrowactive = PETSC_TRUE;
1895 
1896   if (!mat->rowvalues && (idx || v)) {
1897     /*
1898         allocate enough space to hold information from the longest row.
1899     */
1900     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1901     PetscInt   max = 1,tmp;
1902     for (i=0; i<matin->rmap->n; i++) {
1903       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1904       if (max < tmp) max = tmp;
1905     }
1906     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1907   }
1908 
1909   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1910   lrow = row - rstart;
1911 
1912   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1913   if (!v)   {pvA = 0; pvB = 0;}
1914   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1915   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1916   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1917   nztot = nzA + nzB;
1918 
1919   cmap = mat->garray;
1920   if (v  || idx) {
1921     if (nztot) {
1922       /* Sort by increasing column numbers, assuming A and B already sorted */
1923       PetscInt imark = -1;
1924       if (v) {
1925         *v = v_p = mat->rowvalues;
1926         for (i=0; i<nzB; i++) {
1927           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1928           else break;
1929         }
1930         imark = i;
1931         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1932         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1933       }
1934       if (idx) {
1935         *idx = idx_p = mat->rowindices;
1936         if (imark > -1) {
1937           for (i=0; i<imark; i++) {
1938             idx_p[i] = cmap[cworkB[i]];
1939           }
1940         } else {
1941           for (i=0; i<nzB; i++) {
1942             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1943             else break;
1944           }
1945           imark = i;
1946         }
1947         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1948         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1949       }
1950     } else {
1951       if (idx) *idx = 0;
1952       if (v)   *v   = 0;
1953     }
1954   }
1955   *nz  = nztot;
1956   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1957   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1962 {
1963   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1964 
1965   PetscFunctionBegin;
1966   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1967   aij->getrowactive = PETSC_FALSE;
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1972 {
1973   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1974   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1975   PetscErrorCode ierr;
1976   PetscInt       i,j,cstart = mat->cmap->rstart;
1977   PetscReal      sum = 0.0;
1978   MatScalar      *v;
1979 
1980   PetscFunctionBegin;
1981   if (aij->size == 1) {
1982     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1983   } else {
1984     if (type == NORM_FROBENIUS) {
1985       v = amat->a;
1986       for (i=0; i<amat->nz; i++) {
1987         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1988       }
1989       v = bmat->a;
1990       for (i=0; i<bmat->nz; i++) {
1991         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1992       }
1993       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1994       *norm = PetscSqrtReal(*norm);
1995       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1996     } else if (type == NORM_1) { /* max column norm */
1997       PetscReal *tmp,*tmp2;
1998       PetscInt  *jj,*garray = aij->garray;
1999       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
2000       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
2001       *norm = 0.0;
2002       v     = amat->a; jj = amat->j;
2003       for (j=0; j<amat->nz; j++) {
2004         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2005       }
2006       v = bmat->a; jj = bmat->j;
2007       for (j=0; j<bmat->nz; j++) {
2008         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2009       }
2010       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2011       for (j=0; j<mat->cmap->N; j++) {
2012         if (tmp2[j] > *norm) *norm = tmp2[j];
2013       }
2014       ierr = PetscFree(tmp);CHKERRQ(ierr);
2015       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2016       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2017     } else if (type == NORM_INFINITY) { /* max row norm */
2018       PetscReal ntemp = 0.0;
2019       for (j=0; j<aij->A->rmap->n; j++) {
2020         v   = amat->a + amat->i[j];
2021         sum = 0.0;
2022         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2023           sum += PetscAbsScalar(*v); v++;
2024         }
2025         v = bmat->a + bmat->i[j];
2026         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2027           sum += PetscAbsScalar(*v); v++;
2028         }
2029         if (sum > ntemp) ntemp = sum;
2030       }
2031       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2032       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2033     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2039 {
2040   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2041   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2042   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2043   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2044   PetscErrorCode  ierr;
2045   Mat             B,A_diag,*B_diag;
2046   const MatScalar *array;
2047 
2048   PetscFunctionBegin;
2049   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2050   ai = Aloc->i; aj = Aloc->j;
2051   bi = Bloc->i; bj = Bloc->j;
2052   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2053     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2054     PetscSFNode          *oloc;
2055     PETSC_UNUSED PetscSF sf;
2056 
2057     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2058     /* compute d_nnz for preallocation */
2059     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2060     for (i=0; i<ai[ma]; i++) {
2061       d_nnz[aj[i]]++;
2062     }
2063     /* compute local off-diagonal contributions */
2064     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2065     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2066     /* map those to global */
2067     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2068     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2069     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2070     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2071     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2072     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2073     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2074 
2075     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2076     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2077     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2078     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2079     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2080     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2081   } else {
2082     B    = *matout;
2083     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2084   }
2085 
2086   b           = (Mat_MPIAIJ*)B->data;
2087   A_diag      = a->A;
2088   B_diag      = &b->A;
2089   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2090   A_diag_ncol = A_diag->cmap->N;
2091   B_diag_ilen = sub_B_diag->ilen;
2092   B_diag_i    = sub_B_diag->i;
2093 
2094   /* Set ilen for diagonal of B */
2095   for (i=0; i<A_diag_ncol; i++) {
2096     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2097   }
2098 
2099   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2100   very quickly (=without using MatSetValues), because all writes are local. */
2101   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2102 
2103   /* copy over the B part */
2104   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2105   array = Bloc->a;
2106   row   = A->rmap->rstart;
2107   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2108   cols_tmp = cols;
2109   for (i=0; i<mb; i++) {
2110     ncol = bi[i+1]-bi[i];
2111     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2112     row++;
2113     array += ncol; cols_tmp += ncol;
2114   }
2115   ierr = PetscFree(cols);CHKERRQ(ierr);
2116 
2117   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2118   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2120     *matout = B;
2121   } else {
2122     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2128 {
2129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2130   Mat            a    = aij->A,b = aij->B;
2131   PetscErrorCode ierr;
2132   PetscInt       s1,s2,s3;
2133 
2134   PetscFunctionBegin;
2135   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2136   if (rr) {
2137     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2138     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2139     /* Overlap communication with computation. */
2140     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2141   }
2142   if (ll) {
2143     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2144     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2145     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2146   }
2147   /* scale  the diagonal block */
2148   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2149 
2150   if (rr) {
2151     /* Do a scatter end and then right scale the off-diagonal block */
2152     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2153     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2154   }
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2159 {
2160   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2161   PetscErrorCode ierr;
2162 
2163   PetscFunctionBegin;
2164   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2169 {
2170   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2171   Mat            a,b,c,d;
2172   PetscBool      flg;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   a = matA->A; b = matA->B;
2177   c = matB->A; d = matB->B;
2178 
2179   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2180   if (flg) {
2181     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2182   }
2183   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2191   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2192 
2193   PetscFunctionBegin;
2194   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2195   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2196     /* because of the column compression in the off-processor part of the matrix a->B,
2197        the number of columns in a->B and b->B may be different, hence we cannot call
2198        the MatCopy() directly on the two parts. If need be, we can provide a more
2199        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2200        then copying the submatrices */
2201     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2202   } else {
2203     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2204     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2205   }
2206   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2211 {
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 /*
2220    Computes the number of nonzeros per row needed for preallocation when X and Y
2221    have different nonzero structure.
2222 */
2223 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2224 {
2225   PetscInt       i,j,k,nzx,nzy;
2226 
2227   PetscFunctionBegin;
2228   /* Set the number of nonzeros in the new matrix */
2229   for (i=0; i<m; i++) {
2230     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2231     nzx = xi[i+1] - xi[i];
2232     nzy = yi[i+1] - yi[i];
2233     nnz[i] = 0;
2234     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2235       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2236       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2237       nnz[i]++;
2238     }
2239     for (; k<nzy; k++) nnz[i]++;
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2245 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2246 {
2247   PetscErrorCode ierr;
2248   PetscInt       m = Y->rmap->N;
2249   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2250   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2258 {
2259   PetscErrorCode ierr;
2260   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2261   PetscBLASInt   bnz,one=1;
2262   Mat_SeqAIJ     *x,*y;
2263 
2264   PetscFunctionBegin;
2265   if (str == SAME_NONZERO_PATTERN) {
2266     PetscScalar alpha = a;
2267     x    = (Mat_SeqAIJ*)xx->A->data;
2268     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2269     y    = (Mat_SeqAIJ*)yy->A->data;
2270     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2271     x    = (Mat_SeqAIJ*)xx->B->data;
2272     y    = (Mat_SeqAIJ*)yy->B->data;
2273     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2274     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2275     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2276     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2277        will be updated */
2278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2279     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2280       Y->offloadmask = PETSC_OFFLOAD_CPU;
2281     }
2282 #endif
2283   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2284     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2285   } else {
2286     Mat      B;
2287     PetscInt *nnz_d,*nnz_o;
2288     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2289     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2290     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2291     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2292     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2293     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2294     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2295     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2296     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2297     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2298     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2299     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2300     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2301     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2302   }
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2307 
2308 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2309 {
2310 #if defined(PETSC_USE_COMPLEX)
2311   PetscErrorCode ierr;
2312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2316   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2317 #else
2318   PetscFunctionBegin;
2319 #endif
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2324 {
2325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2330   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2335 {
2336   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2341   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2346 {
2347   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2348   PetscErrorCode ierr;
2349   PetscInt       i,*idxb = 0;
2350   PetscScalar    *va,*vb;
2351   Vec            vtmp;
2352 
2353   PetscFunctionBegin;
2354   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2355   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2356   if (idx) {
2357     for (i=0; i<A->rmap->n; i++) {
2358       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2359     }
2360   }
2361 
2362   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2363   if (idx) {
2364     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2365   }
2366   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2367   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2368 
2369   for (i=0; i<A->rmap->n; i++) {
2370     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2371       va[i] = vb[i];
2372       if (idx) idx[i] = a->garray[idxb[i]];
2373     }
2374   }
2375 
2376   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2378   ierr = PetscFree(idxb);CHKERRQ(ierr);
2379   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2384 {
2385   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2386   PetscErrorCode ierr;
2387   PetscInt       i,*idxb = 0;
2388   PetscScalar    *va,*vb;
2389   Vec            vtmp;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2394   if (idx) {
2395     for (i=0; i<A->cmap->n; i++) {
2396       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2397     }
2398   }
2399 
2400   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2401   if (idx) {
2402     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2403   }
2404   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2405   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2406 
2407   for (i=0; i<A->rmap->n; i++) {
2408     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2409       va[i] = vb[i];
2410       if (idx) idx[i] = a->garray[idxb[i]];
2411     }
2412   }
2413 
2414   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2415   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2416   ierr = PetscFree(idxb);CHKERRQ(ierr);
2417   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2422 {
2423   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2424   PetscInt       n      = A->rmap->n;
2425   PetscInt       cstart = A->cmap->rstart;
2426   PetscInt       *cmap  = mat->garray;
2427   PetscInt       *diagIdx, *offdiagIdx;
2428   Vec            diagV, offdiagV;
2429   PetscScalar    *a, *diagA, *offdiagA;
2430   PetscInt       r;
2431   PetscErrorCode ierr;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2436   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2437   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2438   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2440   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2441   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2442   for (r = 0; r < n; ++r) {
2443     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2444       a[r]   = diagA[r];
2445       idx[r] = cstart + diagIdx[r];
2446     } else {
2447       a[r]   = offdiagA[r];
2448       idx[r] = cmap[offdiagIdx[r]];
2449     }
2450   }
2451   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2452   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2453   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2454   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2455   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2456   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2461 {
2462   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2463   PetscInt       n      = A->rmap->n;
2464   PetscInt       cstart = A->cmap->rstart;
2465   PetscInt       *cmap  = mat->garray;
2466   PetscInt       *diagIdx, *offdiagIdx;
2467   Vec            diagV, offdiagV;
2468   PetscScalar    *a, *diagA, *offdiagA;
2469   PetscInt       r;
2470   PetscErrorCode ierr;
2471 
2472   PetscFunctionBegin;
2473   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2476   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2477   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2478   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2479   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2480   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2481   for (r = 0; r < n; ++r) {
2482     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2483       a[r]   = diagA[r];
2484       idx[r] = cstart + diagIdx[r];
2485     } else {
2486       a[r]   = offdiagA[r];
2487       idx[r] = cmap[offdiagIdx[r]];
2488     }
2489   }
2490   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2491   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2492   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2493   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2494   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2495   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2500 {
2501   PetscErrorCode ierr;
2502   Mat            *dummy;
2503 
2504   PetscFunctionBegin;
2505   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2506   *newmat = *dummy;
2507   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2512 {
2513   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2514   PetscErrorCode ierr;
2515 
2516   PetscFunctionBegin;
2517   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2518   A->factorerrortype = a->A->factorerrortype;
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2523 {
2524   PetscErrorCode ierr;
2525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2526 
2527   PetscFunctionBegin;
2528   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2529   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2530   if (x->assembled) {
2531     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2532   } else {
2533     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2534   }
2535   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2536   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2541 {
2542   PetscFunctionBegin;
2543   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2544   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 /*@
2549    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2550 
2551    Collective on Mat
2552 
2553    Input Parameters:
2554 +    A - the matrix
2555 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2556 
2557  Level: advanced
2558 
2559 @*/
2560 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2561 {
2562   PetscErrorCode       ierr;
2563 
2564   PetscFunctionBegin;
2565   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2570 {
2571   PetscErrorCode       ierr;
2572   PetscBool            sc = PETSC_FALSE,flg;
2573 
2574   PetscFunctionBegin;
2575   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2576   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2577   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2578   if (flg) {
2579     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2580   }
2581   ierr = PetscOptionsTail();CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2586 {
2587   PetscErrorCode ierr;
2588   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2589   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2590 
2591   PetscFunctionBegin;
2592   if (!Y->preallocated) {
2593     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2594   } else if (!aij->nz) {
2595     PetscInt nonew = aij->nonew;
2596     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2597     aij->nonew = nonew;
2598   }
2599   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2604 {
2605   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2606   PetscErrorCode ierr;
2607 
2608   PetscFunctionBegin;
2609   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2610   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2611   if (d) {
2612     PetscInt rstart;
2613     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2614     *d += rstart;
2615 
2616   }
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2621 {
2622   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2623   PetscErrorCode ierr;
2624 
2625   PetscFunctionBegin;
2626   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 /* -------------------------------------------------------------------*/
2631 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2632                                        MatGetRow_MPIAIJ,
2633                                        MatRestoreRow_MPIAIJ,
2634                                        MatMult_MPIAIJ,
2635                                 /* 4*/ MatMultAdd_MPIAIJ,
2636                                        MatMultTranspose_MPIAIJ,
2637                                        MatMultTransposeAdd_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*10*/ 0,
2642                                        0,
2643                                        0,
2644                                        MatSOR_MPIAIJ,
2645                                        MatTranspose_MPIAIJ,
2646                                 /*15*/ MatGetInfo_MPIAIJ,
2647                                        MatEqual_MPIAIJ,
2648                                        MatGetDiagonal_MPIAIJ,
2649                                        MatDiagonalScale_MPIAIJ,
2650                                        MatNorm_MPIAIJ,
2651                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2652                                        MatAssemblyEnd_MPIAIJ,
2653                                        MatSetOption_MPIAIJ,
2654                                        MatZeroEntries_MPIAIJ,
2655                                 /*24*/ MatZeroRows_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*29*/ MatSetUp_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        MatGetDiagonalBlock_MPIAIJ,
2664                                        0,
2665                                 /*34*/ MatDuplicate_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*39*/ MatAXPY_MPIAIJ,
2671                                        MatCreateSubMatrices_MPIAIJ,
2672                                        MatIncreaseOverlap_MPIAIJ,
2673                                        MatGetValues_MPIAIJ,
2674                                        MatCopy_MPIAIJ,
2675                                 /*44*/ MatGetRowMax_MPIAIJ,
2676                                        MatScale_MPIAIJ,
2677                                        MatShift_MPIAIJ,
2678                                        MatDiagonalSet_MPIAIJ,
2679                                        MatZeroRowsColumns_MPIAIJ,
2680                                 /*49*/ MatSetRandom_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2686                                        0,
2687                                        MatSetUnfactored_MPIAIJ,
2688                                        MatPermute_MPIAIJ,
2689                                        0,
2690                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2691                                        MatDestroy_MPIAIJ,
2692                                        MatView_MPIAIJ,
2693                                        0,
2694                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2695                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2696                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2701                                        MatGetRowMinAbs_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                 /*75*/ MatFDColoringApply_AIJ,
2707                                        MatSetFromOptions_MPIAIJ,
2708                                        0,
2709                                        0,
2710                                        MatFindZeroDiagonals_MPIAIJ,
2711                                 /*80*/ 0,
2712                                        0,
2713                                        0,
2714                                 /*83*/ MatLoad_MPIAIJ,
2715                                        MatIsSymmetric_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2721                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2722                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2723                                        MatPtAP_MPIAIJ_MPIAIJ,
2724                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2725                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        MatPinToCPU_MPIAIJ,
2730                                 /*99*/ 0,
2731                                        0,
2732                                        0,
2733                                        MatConjugate_MPIAIJ,
2734                                        0,
2735                                 /*104*/MatSetValuesRow_MPIAIJ,
2736                                        MatRealPart_MPIAIJ,
2737                                        MatImaginaryPart_MPIAIJ,
2738                                        0,
2739                                        0,
2740                                 /*109*/0,
2741                                        0,
2742                                        MatGetRowMin_MPIAIJ,
2743                                        0,
2744                                        MatMissingDiagonal_MPIAIJ,
2745                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2746                                        0,
2747                                        MatGetGhosts_MPIAIJ,
2748                                        0,
2749                                        0,
2750                                 /*119*/0,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                        MatGetMultiProcBlock_MPIAIJ,
2755                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2756                                        MatGetColumnNorms_MPIAIJ,
2757                                        MatInvertBlockDiagonal_MPIAIJ,
2758                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2759                                        MatCreateSubMatricesMPI_MPIAIJ,
2760                                 /*129*/0,
2761                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2762                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2763                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2764                                        0,
2765                                 /*134*/0,
2766                                        0,
2767                                        MatRARt_MPIAIJ_MPIAIJ,
2768                                        0,
2769                                        0,
2770                                 /*139*/MatSetBlockSizes_MPIAIJ,
2771                                        0,
2772                                        0,
2773                                        MatFDColoringSetUp_MPIXAIJ,
2774                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2775                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2776 };
2777 
2778 /* ----------------------------------------------------------------------------------------*/
2779 
2780 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2781 {
2782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2787   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2792 {
2793   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2798   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2803 {
2804   Mat_MPIAIJ     *b;
2805   PetscErrorCode ierr;
2806   PetscMPIInt    size;
2807 
2808   PetscFunctionBegin;
2809   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2810   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2811   b = (Mat_MPIAIJ*)B->data;
2812 
2813 #if defined(PETSC_USE_CTABLE)
2814   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2815 #else
2816   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2817 #endif
2818   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2819   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2820   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2821 
2822   /* Because the B will have been resized we simply destroy it and create a new one each time */
2823   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2824   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2825   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2826   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2827   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2828   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2829   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2830 
2831   if (!B->preallocated) {
2832     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2833     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2834     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2835     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2836     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2837   }
2838 
2839   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2840   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2841   B->preallocated  = PETSC_TRUE;
2842   B->was_assembled = PETSC_FALSE;
2843   B->assembled     = PETSC_FALSE;
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2848 {
2849   Mat_MPIAIJ     *b;
2850   PetscErrorCode ierr;
2851 
2852   PetscFunctionBegin;
2853   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2854   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2855   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2856   b = (Mat_MPIAIJ*)B->data;
2857 
2858 #if defined(PETSC_USE_CTABLE)
2859   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2860 #else
2861   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2862 #endif
2863   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2864   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2865   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2866 
2867   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2868   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2869   B->preallocated  = PETSC_TRUE;
2870   B->was_assembled = PETSC_FALSE;
2871   B->assembled = PETSC_FALSE;
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2876 {
2877   Mat            mat;
2878   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2879   PetscErrorCode ierr;
2880 
2881   PetscFunctionBegin;
2882   *newmat = 0;
2883   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2884   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2885   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2886   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2887   a       = (Mat_MPIAIJ*)mat->data;
2888 
2889   mat->factortype   = matin->factortype;
2890   mat->assembled    = PETSC_TRUE;
2891   mat->insertmode   = NOT_SET_VALUES;
2892   mat->preallocated = PETSC_TRUE;
2893 
2894   a->size         = oldmat->size;
2895   a->rank         = oldmat->rank;
2896   a->donotstash   = oldmat->donotstash;
2897   a->roworiented  = oldmat->roworiented;
2898   a->rowindices   = 0;
2899   a->rowvalues    = 0;
2900   a->getrowactive = PETSC_FALSE;
2901 
2902   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2903   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2904 
2905   if (oldmat->colmap) {
2906 #if defined(PETSC_USE_CTABLE)
2907     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2908 #else
2909     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2910     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2911     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2912 #endif
2913   } else a->colmap = 0;
2914   if (oldmat->garray) {
2915     PetscInt len;
2916     len  = oldmat->B->cmap->n;
2917     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2918     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2919     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2920   } else a->garray = 0;
2921 
2922   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2923   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2924   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2925   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2926 
2927   if (oldmat->Mvctx_mpi1) {
2928     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2929     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2930   }
2931 
2932   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2933   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2934   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2935   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2936   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2937   *newmat = mat;
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2942 {
2943   PetscBool      isbinary, ishdf5;
2944   PetscErrorCode ierr;
2945 
2946   PetscFunctionBegin;
2947   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2948   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2949   /* force binary viewer to load .info file if it has not yet done so */
2950   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2953   if (isbinary) {
2954     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2955   } else if (ishdf5) {
2956 #if defined(PETSC_HAVE_HDF5)
2957     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2958 #else
2959     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2960 #endif
2961   } else {
2962     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2963   }
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2968 {
2969   PetscScalar    *vals,*svals;
2970   MPI_Comm       comm;
2971   PetscErrorCode ierr;
2972   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2973   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2974   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2975   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2976   PetscInt       cend,cstart,n,*rowners;
2977   int            fd;
2978   PetscInt       bs = newMat->rmap->bs;
2979 
2980   PetscFunctionBegin;
2981   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2982   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2983   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2984   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2985   if (!rank) {
2986     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2987     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2988     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2989   }
2990 
2991   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2992   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2993   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2994   if (bs < 0) bs = 1;
2995 
2996   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2997   M    = header[1]; N = header[2];
2998 
2999   /* If global sizes are set, check if they are consistent with that given in the file */
3000   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3001   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3002 
3003   /* determine ownership of all (block) rows */
3004   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3005   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3006   else m = newMat->rmap->n; /* Set by user */
3007 
3008   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3009   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3010 
3011   /* First process needs enough room for process with most rows */
3012   if (!rank) {
3013     mmax = rowners[1];
3014     for (i=2; i<=size; i++) {
3015       mmax = PetscMax(mmax, rowners[i]);
3016     }
3017   } else mmax = -1;             /* unused, but compilers complain */
3018 
3019   rowners[0] = 0;
3020   for (i=2; i<=size; i++) {
3021     rowners[i] += rowners[i-1];
3022   }
3023   rstart = rowners[rank];
3024   rend   = rowners[rank+1];
3025 
3026   /* distribute row lengths to all processors */
3027   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3028   if (!rank) {
3029     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3030     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3031     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3032     for (j=0; j<m; j++) {
3033       procsnz[0] += ourlens[j];
3034     }
3035     for (i=1; i<size; i++) {
3036       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3037       /* calculate the number of nonzeros on each processor */
3038       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3039         procsnz[i] += rowlengths[j];
3040       }
3041       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3042     }
3043     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3044   } else {
3045     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3046   }
3047 
3048   if (!rank) {
3049     /* determine max buffer needed and allocate it */
3050     maxnz = 0;
3051     for (i=0; i<size; i++) {
3052       maxnz = PetscMax(maxnz,procsnz[i]);
3053     }
3054     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3055 
3056     /* read in my part of the matrix column indices  */
3057     nz   = procsnz[0];
3058     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3059     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3060 
3061     /* read in every one elses and ship off */
3062     for (i=1; i<size; i++) {
3063       nz   = procsnz[i];
3064       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3065       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3066     }
3067     ierr = PetscFree(cols);CHKERRQ(ierr);
3068   } else {
3069     /* determine buffer space needed for message */
3070     nz = 0;
3071     for (i=0; i<m; i++) {
3072       nz += ourlens[i];
3073     }
3074     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3075 
3076     /* receive message of column indices*/
3077     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3078   }
3079 
3080   /* determine column ownership if matrix is not square */
3081   if (N != M) {
3082     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3083     else n = newMat->cmap->n;
3084     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3085     cstart = cend - n;
3086   } else {
3087     cstart = rstart;
3088     cend   = rend;
3089     n      = cend - cstart;
3090   }
3091 
3092   /* loop over local rows, determining number of off diagonal entries */
3093   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3094   jj   = 0;
3095   for (i=0; i<m; i++) {
3096     for (j=0; j<ourlens[i]; j++) {
3097       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3098       jj++;
3099     }
3100   }
3101 
3102   for (i=0; i<m; i++) {
3103     ourlens[i] -= offlens[i];
3104   }
3105   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3106 
3107   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3108 
3109   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3110 
3111   for (i=0; i<m; i++) {
3112     ourlens[i] += offlens[i];
3113   }
3114 
3115   if (!rank) {
3116     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3117 
3118     /* read in my part of the matrix numerical values  */
3119     nz   = procsnz[0];
3120     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3121 
3122     /* insert into matrix */
3123     jj      = rstart;
3124     smycols = mycols;
3125     svals   = vals;
3126     for (i=0; i<m; i++) {
3127       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3128       smycols += ourlens[i];
3129       svals   += ourlens[i];
3130       jj++;
3131     }
3132 
3133     /* read in other processors and ship out */
3134     for (i=1; i<size; i++) {
3135       nz   = procsnz[i];
3136       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3137       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3138     }
3139     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3140   } else {
3141     /* receive numeric values */
3142     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3143 
3144     /* receive message of values*/
3145     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3146 
3147     /* insert into matrix */
3148     jj      = rstart;
3149     smycols = mycols;
3150     svals   = vals;
3151     for (i=0; i<m; i++) {
3152       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3153       smycols += ourlens[i];
3154       svals   += ourlens[i];
3155       jj++;
3156     }
3157   }
3158   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3159   ierr = PetscFree(vals);CHKERRQ(ierr);
3160   ierr = PetscFree(mycols);CHKERRQ(ierr);
3161   ierr = PetscFree(rowners);CHKERRQ(ierr);
3162   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 /* Not scalable because of ISAllGather() unless getting all columns. */
3168 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3169 {
3170   PetscErrorCode ierr;
3171   IS             iscol_local;
3172   PetscBool      isstride;
3173   PetscMPIInt    lisstride=0,gisstride;
3174 
3175   PetscFunctionBegin;
3176   /* check if we are grabbing all columns*/
3177   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3178 
3179   if (isstride) {
3180     PetscInt  start,len,mstart,mlen;
3181     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3182     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3183     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3184     if (mstart == start && mlen-mstart == len) lisstride = 1;
3185   }
3186 
3187   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3188   if (gisstride) {
3189     PetscInt N;
3190     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3191     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3192     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3193     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3194   } else {
3195     PetscInt cbs;
3196     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3197     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3198     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3199   }
3200 
3201   *isseq = iscol_local;
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 /*
3206  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3207  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3208 
3209  Input Parameters:
3210    mat - matrix
3211    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3212            i.e., mat->rstart <= isrow[i] < mat->rend
3213    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3214            i.e., mat->cstart <= iscol[i] < mat->cend
3215  Output Parameter:
3216    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3217    iscol_o - sequential column index set for retrieving mat->B
3218    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3219  */
3220 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3221 {
3222   PetscErrorCode ierr;
3223   Vec            x,cmap;
3224   const PetscInt *is_idx;
3225   PetscScalar    *xarray,*cmaparray;
3226   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3227   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3228   Mat            B=a->B;
3229   Vec            lvec=a->lvec,lcmap;
3230   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3231   MPI_Comm       comm;
3232   VecScatter     Mvctx=a->Mvctx;
3233 
3234   PetscFunctionBegin;
3235   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3236   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3237 
3238   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3239   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3240   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3241   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3242   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3243 
3244   /* Get start indices */
3245   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3246   isstart -= ncols;
3247   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3248 
3249   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3250   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3251   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3252   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3253   for (i=0; i<ncols; i++) {
3254     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3255     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3256     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3257   }
3258   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3259   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3260   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3261 
3262   /* Get iscol_d */
3263   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3264   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3265   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3266 
3267   /* Get isrow_d */
3268   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3269   rstart = mat->rmap->rstart;
3270   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3271   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3272   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3273   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3274 
3275   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3276   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3277   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3278 
3279   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3280   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3281   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3282 
3283   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3284 
3285   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3286   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3287 
3288   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3289   /* off-process column indices */
3290   count = 0;
3291   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3292   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3293 
3294   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3295   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3296   for (i=0; i<Bn; i++) {
3297     if (PetscRealPart(xarray[i]) > -1.0) {
3298       idx[count]     = i;                   /* local column index in off-diagonal part B */
3299       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3300       count++;
3301     }
3302   }
3303   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3304   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3305 
3306   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3307   /* cannot ensure iscol_o has same blocksize as iscol! */
3308 
3309   ierr = PetscFree(idx);CHKERRQ(ierr);
3310   *garray = cmap1;
3311 
3312   ierr = VecDestroy(&x);CHKERRQ(ierr);
3313   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3314   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3315   PetscFunctionReturn(0);
3316 }
3317 
3318 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3319 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3320 {
3321   PetscErrorCode ierr;
3322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3323   Mat            M = NULL;
3324   MPI_Comm       comm;
3325   IS             iscol_d,isrow_d,iscol_o;
3326   Mat            Asub = NULL,Bsub = NULL;
3327   PetscInt       n;
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3331 
3332   if (call == MAT_REUSE_MATRIX) {
3333     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3334     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3335     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3336 
3337     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3338     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3339 
3340     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3341     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3342 
3343     /* Update diagonal and off-diagonal portions of submat */
3344     asub = (Mat_MPIAIJ*)(*submat)->data;
3345     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3346     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3347     if (n) {
3348       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3349     }
3350     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3351     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3352 
3353   } else { /* call == MAT_INITIAL_MATRIX) */
3354     const PetscInt *garray;
3355     PetscInt        BsubN;
3356 
3357     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3358     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3359 
3360     /* Create local submatrices Asub and Bsub */
3361     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3362     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3363 
3364     /* Create submatrix M */
3365     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3366 
3367     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3368     asub = (Mat_MPIAIJ*)M->data;
3369 
3370     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3371     n = asub->B->cmap->N;
3372     if (BsubN > n) {
3373       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3374       const PetscInt *idx;
3375       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3376       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3377 
3378       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3379       j = 0;
3380       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3381       for (i=0; i<n; i++) {
3382         if (j >= BsubN) break;
3383         while (subgarray[i] > garray[j]) j++;
3384 
3385         if (subgarray[i] == garray[j]) {
3386           idx_new[i] = idx[j++];
3387         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3388       }
3389       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3390 
3391       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3392       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3393 
3394     } else if (BsubN < n) {
3395       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3396     }
3397 
3398     ierr = PetscFree(garray);CHKERRQ(ierr);
3399     *submat = M;
3400 
3401     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3402     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3403     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3404 
3405     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3406     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3407 
3408     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3409     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3410   }
3411   PetscFunctionReturn(0);
3412 }
3413 
3414 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3415 {
3416   PetscErrorCode ierr;
3417   IS             iscol_local=NULL,isrow_d;
3418   PetscInt       csize;
3419   PetscInt       n,i,j,start,end;
3420   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3421   MPI_Comm       comm;
3422 
3423   PetscFunctionBegin;
3424   /* If isrow has same processor distribution as mat,
3425      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3426   if (call == MAT_REUSE_MATRIX) {
3427     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3428     if (isrow_d) {
3429       sameRowDist  = PETSC_TRUE;
3430       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3431     } else {
3432       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3433       if (iscol_local) {
3434         sameRowDist  = PETSC_TRUE;
3435         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3436       }
3437     }
3438   } else {
3439     /* Check if isrow has same processor distribution as mat */
3440     sameDist[0] = PETSC_FALSE;
3441     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3442     if (!n) {
3443       sameDist[0] = PETSC_TRUE;
3444     } else {
3445       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3446       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3447       if (i >= start && j < end) {
3448         sameDist[0] = PETSC_TRUE;
3449       }
3450     }
3451 
3452     /* Check if iscol has same processor distribution as mat */
3453     sameDist[1] = PETSC_FALSE;
3454     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3455     if (!n) {
3456       sameDist[1] = PETSC_TRUE;
3457     } else {
3458       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3459       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3460       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3461     }
3462 
3463     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3464     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3465     sameRowDist = tsameDist[0];
3466   }
3467 
3468   if (sameRowDist) {
3469     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3470       /* isrow and iscol have same processor distribution as mat */
3471       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3472       PetscFunctionReturn(0);
3473     } else { /* sameRowDist */
3474       /* isrow has same processor distribution as mat */
3475       if (call == MAT_INITIAL_MATRIX) {
3476         PetscBool sorted;
3477         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3478         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3479         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3480         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3481 
3482         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3483         if (sorted) {
3484           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3485           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3486           PetscFunctionReturn(0);
3487         }
3488       } else { /* call == MAT_REUSE_MATRIX */
3489         IS    iscol_sub;
3490         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3491         if (iscol_sub) {
3492           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3493           PetscFunctionReturn(0);
3494         }
3495       }
3496     }
3497   }
3498 
3499   /* General case: iscol -> iscol_local which has global size of iscol */
3500   if (call == MAT_REUSE_MATRIX) {
3501     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3502     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3503   } else {
3504     if (!iscol_local) {
3505       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3506     }
3507   }
3508 
3509   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3510   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3511 
3512   if (call == MAT_INITIAL_MATRIX) {
3513     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3514     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3515   }
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 /*@C
3520      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3521          and "off-diagonal" part of the matrix in CSR format.
3522 
3523    Collective
3524 
3525    Input Parameters:
3526 +  comm - MPI communicator
3527 .  A - "diagonal" portion of matrix
3528 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3529 -  garray - global index of B columns
3530 
3531    Output Parameter:
3532 .   mat - the matrix, with input A as its local diagonal matrix
3533    Level: advanced
3534 
3535    Notes:
3536        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3537        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3538 
3539 .seealso: MatCreateMPIAIJWithSplitArrays()
3540 @*/
3541 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3542 {
3543   PetscErrorCode ierr;
3544   Mat_MPIAIJ     *maij;
3545   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3546   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3547   PetscScalar    *oa=b->a;
3548   Mat            Bnew;
3549   PetscInt       m,n,N;
3550 
3551   PetscFunctionBegin;
3552   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3553   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3554   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3555   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3556   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3557   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3558 
3559   /* Get global columns of mat */
3560   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3561 
3562   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3563   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3564   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3565   maij = (Mat_MPIAIJ*)(*mat)->data;
3566 
3567   (*mat)->preallocated = PETSC_TRUE;
3568 
3569   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3570   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3571 
3572   /* Set A as diagonal portion of *mat */
3573   maij->A = A;
3574 
3575   nz = oi[m];
3576   for (i=0; i<nz; i++) {
3577     col   = oj[i];
3578     oj[i] = garray[col];
3579   }
3580 
3581    /* Set Bnew as off-diagonal portion of *mat */
3582   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3583   bnew        = (Mat_SeqAIJ*)Bnew->data;
3584   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3585   maij->B     = Bnew;
3586 
3587   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3588 
3589   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3590   b->free_a       = PETSC_FALSE;
3591   b->free_ij      = PETSC_FALSE;
3592   ierr = MatDestroy(&B);CHKERRQ(ierr);
3593 
3594   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3595   bnew->free_a       = PETSC_TRUE;
3596   bnew->free_ij      = PETSC_TRUE;
3597 
3598   /* condense columns of maij->B */
3599   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3600   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3601   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3602   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3603   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3604   PetscFunctionReturn(0);
3605 }
3606 
3607 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3608 
3609 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3610 {
3611   PetscErrorCode ierr;
3612   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3613   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3614   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3615   Mat            M,Msub,B=a->B;
3616   MatScalar      *aa;
3617   Mat_SeqAIJ     *aij;
3618   PetscInt       *garray = a->garray,*colsub,Ncols;
3619   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3620   IS             iscol_sub,iscmap;
3621   const PetscInt *is_idx,*cmap;
3622   PetscBool      allcolumns=PETSC_FALSE;
3623   MPI_Comm       comm;
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3627 
3628   if (call == MAT_REUSE_MATRIX) {
3629     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3630     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3631     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3632 
3633     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3634     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3635 
3636     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3637     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3638 
3639     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3640 
3641   } else { /* call == MAT_INITIAL_MATRIX) */
3642     PetscBool flg;
3643 
3644     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3645     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3646 
3647     /* (1) iscol -> nonscalable iscol_local */
3648     /* Check for special case: each processor gets entire matrix columns */
3649     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3650     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3651     if (allcolumns) {
3652       iscol_sub = iscol_local;
3653       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3654       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3655 
3656     } else {
3657       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3658       PetscInt *idx,*cmap1,k;
3659       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3660       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3661       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3662       count = 0;
3663       k     = 0;
3664       for (i=0; i<Ncols; i++) {
3665         j = is_idx[i];
3666         if (j >= cstart && j < cend) {
3667           /* diagonal part of mat */
3668           idx[count]     = j;
3669           cmap1[count++] = i; /* column index in submat */
3670         } else if (Bn) {
3671           /* off-diagonal part of mat */
3672           if (j == garray[k]) {
3673             idx[count]     = j;
3674             cmap1[count++] = i;  /* column index in submat */
3675           } else if (j > garray[k]) {
3676             while (j > garray[k] && k < Bn-1) k++;
3677             if (j == garray[k]) {
3678               idx[count]     = j;
3679               cmap1[count++] = i; /* column index in submat */
3680             }
3681           }
3682         }
3683       }
3684       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3685 
3686       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3687       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3688       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3689 
3690       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3691     }
3692 
3693     /* (3) Create sequential Msub */
3694     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3695   }
3696 
3697   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3698   aij  = (Mat_SeqAIJ*)(Msub)->data;
3699   ii   = aij->i;
3700   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3701 
3702   /*
3703       m - number of local rows
3704       Ncols - number of columns (same on all processors)
3705       rstart - first row in new global matrix generated
3706   */
3707   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3708 
3709   if (call == MAT_INITIAL_MATRIX) {
3710     /* (4) Create parallel newmat */
3711     PetscMPIInt    rank,size;
3712     PetscInt       csize;
3713 
3714     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3715     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3716 
3717     /*
3718         Determine the number of non-zeros in the diagonal and off-diagonal
3719         portions of the matrix in order to do correct preallocation
3720     */
3721 
3722     /* first get start and end of "diagonal" columns */
3723     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3724     if (csize == PETSC_DECIDE) {
3725       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3726       if (mglobal == Ncols) { /* square matrix */
3727         nlocal = m;
3728       } else {
3729         nlocal = Ncols/size + ((Ncols % size) > rank);
3730       }
3731     } else {
3732       nlocal = csize;
3733     }
3734     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3735     rstart = rend - nlocal;
3736     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3737 
3738     /* next, compute all the lengths */
3739     jj    = aij->j;
3740     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3741     olens = dlens + m;
3742     for (i=0; i<m; i++) {
3743       jend = ii[i+1] - ii[i];
3744       olen = 0;
3745       dlen = 0;
3746       for (j=0; j<jend; j++) {
3747         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3748         else dlen++;
3749         jj++;
3750       }
3751       olens[i] = olen;
3752       dlens[i] = dlen;
3753     }
3754 
3755     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3756     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3757 
3758     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3759     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3760     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3761     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3762     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3763     ierr = PetscFree(dlens);CHKERRQ(ierr);
3764 
3765   } else { /* call == MAT_REUSE_MATRIX */
3766     M    = *newmat;
3767     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3768     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3769     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3770     /*
3771          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3772        rather than the slower MatSetValues().
3773     */
3774     M->was_assembled = PETSC_TRUE;
3775     M->assembled     = PETSC_FALSE;
3776   }
3777 
3778   /* (5) Set values of Msub to *newmat */
3779   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3780   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3781 
3782   jj   = aij->j;
3783   aa   = aij->a;
3784   for (i=0; i<m; i++) {
3785     row = rstart + i;
3786     nz  = ii[i+1] - ii[i];
3787     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3788     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3789     jj += nz; aa += nz;
3790   }
3791   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3792 
3793   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3794   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3795 
3796   ierr = PetscFree(colsub);CHKERRQ(ierr);
3797 
3798   /* save Msub, iscol_sub and iscmap used in processor for next request */
3799   if (call ==  MAT_INITIAL_MATRIX) {
3800     *newmat = M;
3801     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3802     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3803 
3804     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3805     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3806 
3807     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3808     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3809 
3810     if (iscol_local) {
3811       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3812       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3813     }
3814   }
3815   PetscFunctionReturn(0);
3816 }
3817 
3818 /*
3819     Not great since it makes two copies of the submatrix, first an SeqAIJ
3820   in local and then by concatenating the local matrices the end result.
3821   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3822 
3823   Note: This requires a sequential iscol with all indices.
3824 */
3825 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3826 {
3827   PetscErrorCode ierr;
3828   PetscMPIInt    rank,size;
3829   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3830   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3831   Mat            M,Mreuse;
3832   MatScalar      *aa,*vwork;
3833   MPI_Comm       comm;
3834   Mat_SeqAIJ     *aij;
3835   PetscBool      colflag,allcolumns=PETSC_FALSE;
3836 
3837   PetscFunctionBegin;
3838   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3839   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3840   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3841 
3842   /* Check for special case: each processor gets entire matrix columns */
3843   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3844   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3845   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3846 
3847   if (call ==  MAT_REUSE_MATRIX) {
3848     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3849     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3850     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3851   } else {
3852     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3853   }
3854 
3855   /*
3856       m - number of local rows
3857       n - number of columns (same on all processors)
3858       rstart - first row in new global matrix generated
3859   */
3860   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3861   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3862   if (call == MAT_INITIAL_MATRIX) {
3863     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3864     ii  = aij->i;
3865     jj  = aij->j;
3866 
3867     /*
3868         Determine the number of non-zeros in the diagonal and off-diagonal
3869         portions of the matrix in order to do correct preallocation
3870     */
3871 
3872     /* first get start and end of "diagonal" columns */
3873     if (csize == PETSC_DECIDE) {
3874       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3875       if (mglobal == n) { /* square matrix */
3876         nlocal = m;
3877       } else {
3878         nlocal = n/size + ((n % size) > rank);
3879       }
3880     } else {
3881       nlocal = csize;
3882     }
3883     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3884     rstart = rend - nlocal;
3885     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3886 
3887     /* next, compute all the lengths */
3888     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3889     olens = dlens + m;
3890     for (i=0; i<m; i++) {
3891       jend = ii[i+1] - ii[i];
3892       olen = 0;
3893       dlen = 0;
3894       for (j=0; j<jend; j++) {
3895         if (*jj < rstart || *jj >= rend) olen++;
3896         else dlen++;
3897         jj++;
3898       }
3899       olens[i] = olen;
3900       dlens[i] = dlen;
3901     }
3902     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3903     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3904     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3905     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3906     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3907     ierr = PetscFree(dlens);CHKERRQ(ierr);
3908   } else {
3909     PetscInt ml,nl;
3910 
3911     M    = *newmat;
3912     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3913     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3914     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3915     /*
3916          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3917        rather than the slower MatSetValues().
3918     */
3919     M->was_assembled = PETSC_TRUE;
3920     M->assembled     = PETSC_FALSE;
3921   }
3922   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3923   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3924   ii   = aij->i;
3925   jj   = aij->j;
3926   aa   = aij->a;
3927   for (i=0; i<m; i++) {
3928     row   = rstart + i;
3929     nz    = ii[i+1] - ii[i];
3930     cwork = jj;     jj += nz;
3931     vwork = aa;     aa += nz;
3932     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3933   }
3934 
3935   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   *newmat = M;
3938 
3939   /* save submatrix used in processor for next request */
3940   if (call ==  MAT_INITIAL_MATRIX) {
3941     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3942     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3943   }
3944   PetscFunctionReturn(0);
3945 }
3946 
3947 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3948 {
3949   PetscInt       m,cstart, cend,j,nnz,i,d;
3950   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3951   const PetscInt *JJ;
3952   PetscErrorCode ierr;
3953   PetscBool      nooffprocentries;
3954 
3955   PetscFunctionBegin;
3956   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3957 
3958   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3959   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3960   m      = B->rmap->n;
3961   cstart = B->cmap->rstart;
3962   cend   = B->cmap->rend;
3963   rstart = B->rmap->rstart;
3964 
3965   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3966 
3967 #if defined(PETSC_USE_DEBUG)
3968   for (i=0; i<m; i++) {
3969     nnz = Ii[i+1]- Ii[i];
3970     JJ  = J + Ii[i];
3971     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3972     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3973     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3974   }
3975 #endif
3976 
3977   for (i=0; i<m; i++) {
3978     nnz     = Ii[i+1]- Ii[i];
3979     JJ      = J + Ii[i];
3980     nnz_max = PetscMax(nnz_max,nnz);
3981     d       = 0;
3982     for (j=0; j<nnz; j++) {
3983       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3984     }
3985     d_nnz[i] = d;
3986     o_nnz[i] = nnz - d;
3987   }
3988   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3989   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3990 
3991   for (i=0; i<m; i++) {
3992     ii   = i + rstart;
3993     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3994   }
3995   nooffprocentries    = B->nooffprocentries;
3996   B->nooffprocentries = PETSC_TRUE;
3997   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3998   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3999   B->nooffprocentries = nooffprocentries;
4000 
4001   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4002   PetscFunctionReturn(0);
4003 }
4004 
4005 /*@
4006    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4007    (the default parallel PETSc format).
4008 
4009    Collective
4010 
4011    Input Parameters:
4012 +  B - the matrix
4013 .  i - the indices into j for the start of each local row (starts with zero)
4014 .  j - the column indices for each local row (starts with zero)
4015 -  v - optional values in the matrix
4016 
4017    Level: developer
4018 
4019    Notes:
4020        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4021      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4022      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4023 
4024        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4025 
4026        The format which is used for the sparse matrix input, is equivalent to a
4027     row-major ordering.. i.e for the following matrix, the input data expected is
4028     as shown
4029 
4030 $        1 0 0
4031 $        2 0 3     P0
4032 $       -------
4033 $        4 5 6     P1
4034 $
4035 $     Process0 [P0]: rows_owned=[0,1]
4036 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4037 $        j =  {0,0,2}  [size = 3]
4038 $        v =  {1,2,3}  [size = 3]
4039 $
4040 $     Process1 [P1]: rows_owned=[2]
4041 $        i =  {0,3}    [size = nrow+1  = 1+1]
4042 $        j =  {0,1,2}  [size = 3]
4043 $        v =  {4,5,6}  [size = 3]
4044 
4045 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4046           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4047 @*/
4048 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4049 {
4050   PetscErrorCode ierr;
4051 
4052   PetscFunctionBegin;
4053   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4054   PetscFunctionReturn(0);
4055 }
4056 
4057 /*@C
4058    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4059    (the default parallel PETSc format).  For good matrix assembly performance
4060    the user should preallocate the matrix storage by setting the parameters
4061    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4062    performance can be increased by more than a factor of 50.
4063 
4064    Collective
4065 
4066    Input Parameters:
4067 +  B - the matrix
4068 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4069            (same value is used for all local rows)
4070 .  d_nnz - array containing the number of nonzeros in the various rows of the
4071            DIAGONAL portion of the local submatrix (possibly different for each row)
4072            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4073            The size of this array is equal to the number of local rows, i.e 'm'.
4074            For matrices that will be factored, you must leave room for (and set)
4075            the diagonal entry even if it is zero.
4076 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4077            submatrix (same value is used for all local rows).
4078 -  o_nnz - array containing the number of nonzeros in the various rows of the
4079            OFF-DIAGONAL portion of the local submatrix (possibly different for
4080            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4081            structure. The size of this array is equal to the number
4082            of local rows, i.e 'm'.
4083 
4084    If the *_nnz parameter is given then the *_nz parameter is ignored
4085 
4086    The AIJ format (also called the Yale sparse matrix format or
4087    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4088    storage.  The stored row and column indices begin with zero.
4089    See Users-Manual: ch_mat for details.
4090 
4091    The parallel matrix is partitioned such that the first m0 rows belong to
4092    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4093    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4094 
4095    The DIAGONAL portion of the local submatrix of a processor can be defined
4096    as the submatrix which is obtained by extraction the part corresponding to
4097    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4098    first row that belongs to the processor, r2 is the last row belonging to
4099    the this processor, and c1-c2 is range of indices of the local part of a
4100    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4101    common case of a square matrix, the row and column ranges are the same and
4102    the DIAGONAL part is also square. The remaining portion of the local
4103    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4104 
4105    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4106 
4107    You can call MatGetInfo() to get information on how effective the preallocation was;
4108    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4109    You can also run with the option -info and look for messages with the string
4110    malloc in them to see if additional memory allocation was needed.
4111 
4112    Example usage:
4113 
4114    Consider the following 8x8 matrix with 34 non-zero values, that is
4115    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4116    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4117    as follows:
4118 
4119 .vb
4120             1  2  0  |  0  3  0  |  0  4
4121     Proc0   0  5  6  |  7  0  0  |  8  0
4122             9  0 10  | 11  0  0  | 12  0
4123     -------------------------------------
4124            13  0 14  | 15 16 17  |  0  0
4125     Proc1   0 18  0  | 19 20 21  |  0  0
4126             0  0  0  | 22 23  0  | 24  0
4127     -------------------------------------
4128     Proc2  25 26 27  |  0  0 28  | 29  0
4129            30  0  0  | 31 32 33  |  0 34
4130 .ve
4131 
4132    This can be represented as a collection of submatrices as:
4133 
4134 .vb
4135       A B C
4136       D E F
4137       G H I
4138 .ve
4139 
4140    Where the submatrices A,B,C are owned by proc0, D,E,F are
4141    owned by proc1, G,H,I are owned by proc2.
4142 
4143    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4144    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4145    The 'M','N' parameters are 8,8, and have the same values on all procs.
4146 
4147    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4148    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4149    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4150    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4151    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4152    matrix, ans [DF] as another SeqAIJ matrix.
4153 
4154    When d_nz, o_nz parameters are specified, d_nz storage elements are
4155    allocated for every row of the local diagonal submatrix, and o_nz
4156    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4157    One way to choose d_nz and o_nz is to use the max nonzerors per local
4158    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4159    In this case, the values of d_nz,o_nz are:
4160 .vb
4161      proc0 : dnz = 2, o_nz = 2
4162      proc1 : dnz = 3, o_nz = 2
4163      proc2 : dnz = 1, o_nz = 4
4164 .ve
4165    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4166    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4167    for proc3. i.e we are using 12+15+10=37 storage locations to store
4168    34 values.
4169 
4170    When d_nnz, o_nnz parameters are specified, the storage is specified
4171    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4172    In the above case the values for d_nnz,o_nnz are:
4173 .vb
4174      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4175      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4176      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4177 .ve
4178    Here the space allocated is sum of all the above values i.e 34, and
4179    hence pre-allocation is perfect.
4180 
4181    Level: intermediate
4182 
4183 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4184           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4185 @*/
4186 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4187 {
4188   PetscErrorCode ierr;
4189 
4190   PetscFunctionBegin;
4191   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4192   PetscValidType(B,1);
4193   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4194   PetscFunctionReturn(0);
4195 }
4196 
4197 /*@
4198      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4199          CSR format for the local rows.
4200 
4201    Collective
4202 
4203    Input Parameters:
4204 +  comm - MPI communicator
4205 .  m - number of local rows (Cannot be PETSC_DECIDE)
4206 .  n - This value should be the same as the local size used in creating the
4207        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4208        calculated if N is given) For square matrices n is almost always m.
4209 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4210 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4211 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4212 .   j - column indices
4213 -   a - matrix values
4214 
4215    Output Parameter:
4216 .   mat - the matrix
4217 
4218    Level: intermediate
4219 
4220    Notes:
4221        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4222      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4223      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4224 
4225        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4226 
4227        The format which is used for the sparse matrix input, is equivalent to a
4228     row-major ordering.. i.e for the following matrix, the input data expected is
4229     as shown
4230 
4231        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4232 
4233 $        1 0 0
4234 $        2 0 3     P0
4235 $       -------
4236 $        4 5 6     P1
4237 $
4238 $     Process0 [P0]: rows_owned=[0,1]
4239 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4240 $        j =  {0,0,2}  [size = 3]
4241 $        v =  {1,2,3}  [size = 3]
4242 $
4243 $     Process1 [P1]: rows_owned=[2]
4244 $        i =  {0,3}    [size = nrow+1  = 1+1]
4245 $        j =  {0,1,2}  [size = 3]
4246 $        v =  {4,5,6}  [size = 3]
4247 
4248 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4249           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4250 @*/
4251 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4252 {
4253   PetscErrorCode ierr;
4254 
4255   PetscFunctionBegin;
4256   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4257   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4258   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4259   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4260   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4261   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4262   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4263   PetscFunctionReturn(0);
4264 }
4265 
4266 /*@
4267      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4268          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4269 
4270    Collective
4271 
4272    Input Parameters:
4273 +  mat - the matrix
4274 .  m - number of local rows (Cannot be PETSC_DECIDE)
4275 .  n - This value should be the same as the local size used in creating the
4276        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4277        calculated if N is given) For square matrices n is almost always m.
4278 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4279 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4280 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4281 .  J - column indices
4282 -  v - matrix values
4283 
4284    Level: intermediate
4285 
4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4287           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4288 @*/
4289 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4290 {
4291   PetscErrorCode ierr;
4292   PetscInt       cstart,nnz,i,j;
4293   PetscInt       *ld;
4294   PetscBool      nooffprocentries;
4295   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4296   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4297   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4298   const PetscInt *Adi = Ad->i;
4299   PetscInt       ldi,Iii,md;
4300 
4301   PetscFunctionBegin;
4302   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4303   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4304   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4305   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4306 
4307   cstart = mat->cmap->rstart;
4308   if (!Aij->ld) {
4309     /* count number of entries below block diagonal */
4310     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4311     Aij->ld = ld;
4312     for (i=0; i<m; i++) {
4313       nnz  = Ii[i+1]- Ii[i];
4314       j     = 0;
4315       while  (J[j] < cstart && j < nnz) {j++;}
4316       J    += nnz;
4317       ld[i] = j;
4318     }
4319   } else {
4320     ld = Aij->ld;
4321   }
4322 
4323   for (i=0; i<m; i++) {
4324     nnz  = Ii[i+1]- Ii[i];
4325     Iii  = Ii[i];
4326     ldi  = ld[i];
4327     md   = Adi[i+1]-Adi[i];
4328     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4329     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4330     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4331     ad  += md;
4332     ao  += nnz - md;
4333   }
4334   nooffprocentries      = mat->nooffprocentries;
4335   mat->nooffprocentries = PETSC_TRUE;
4336   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4337   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4338   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4339   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4340   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4341   mat->nooffprocentries = nooffprocentries;
4342   PetscFunctionReturn(0);
4343 }
4344 
4345 /*@C
4346    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4347    (the default parallel PETSc format).  For good matrix assembly performance
4348    the user should preallocate the matrix storage by setting the parameters
4349    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4350    performance can be increased by more than a factor of 50.
4351 
4352    Collective
4353 
4354    Input Parameters:
4355 +  comm - MPI communicator
4356 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4357            This value should be the same as the local size used in creating the
4358            y vector for the matrix-vector product y = Ax.
4359 .  n - This value should be the same as the local size used in creating the
4360        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4361        calculated if N is given) For square matrices n is almost always m.
4362 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4363 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4364 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4365            (same value is used for all local rows)
4366 .  d_nnz - array containing the number of nonzeros in the various rows of the
4367            DIAGONAL portion of the local submatrix (possibly different for each row)
4368            or NULL, if d_nz is used to specify the nonzero structure.
4369            The size of this array is equal to the number of local rows, i.e 'm'.
4370 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4371            submatrix (same value is used for all local rows).
4372 -  o_nnz - array containing the number of nonzeros in the various rows of the
4373            OFF-DIAGONAL portion of the local submatrix (possibly different for
4374            each row) or NULL, if o_nz is used to specify the nonzero
4375            structure. The size of this array is equal to the number
4376            of local rows, i.e 'm'.
4377 
4378    Output Parameter:
4379 .  A - the matrix
4380 
4381    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4382    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4383    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4384 
4385    Notes:
4386    If the *_nnz parameter is given then the *_nz parameter is ignored
4387 
4388    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4389    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4390    storage requirements for this matrix.
4391 
4392    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4393    processor than it must be used on all processors that share the object for
4394    that argument.
4395 
4396    The user MUST specify either the local or global matrix dimensions
4397    (possibly both).
4398 
4399    The parallel matrix is partitioned across processors such that the
4400    first m0 rows belong to process 0, the next m1 rows belong to
4401    process 1, the next m2 rows belong to process 2 etc.. where
4402    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4403    values corresponding to [m x N] submatrix.
4404 
4405    The columns are logically partitioned with the n0 columns belonging
4406    to 0th partition, the next n1 columns belonging to the next
4407    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4408 
4409    The DIAGONAL portion of the local submatrix on any given processor
4410    is the submatrix corresponding to the rows and columns m,n
4411    corresponding to the given processor. i.e diagonal matrix on
4412    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4413    etc. The remaining portion of the local submatrix [m x (N-n)]
4414    constitute the OFF-DIAGONAL portion. The example below better
4415    illustrates this concept.
4416 
4417    For a square global matrix we define each processor's diagonal portion
4418    to be its local rows and the corresponding columns (a square submatrix);
4419    each processor's off-diagonal portion encompasses the remainder of the
4420    local matrix (a rectangular submatrix).
4421 
4422    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4423 
4424    When calling this routine with a single process communicator, a matrix of
4425    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4426    type of communicator, use the construction mechanism
4427 .vb
4428      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4429 .ve
4430 
4431 $     MatCreate(...,&A);
4432 $     MatSetType(A,MATMPIAIJ);
4433 $     MatSetSizes(A, m,n,M,N);
4434 $     MatMPIAIJSetPreallocation(A,...);
4435 
4436    By default, this format uses inodes (identical nodes) when possible.
4437    We search for consecutive rows with the same nonzero structure, thereby
4438    reusing matrix information to achieve increased efficiency.
4439 
4440    Options Database Keys:
4441 +  -mat_no_inode  - Do not use inodes
4442 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4443 
4444 
4445 
4446    Example usage:
4447 
4448    Consider the following 8x8 matrix with 34 non-zero values, that is
4449    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4450    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4451    as follows
4452 
4453 .vb
4454             1  2  0  |  0  3  0  |  0  4
4455     Proc0   0  5  6  |  7  0  0  |  8  0
4456             9  0 10  | 11  0  0  | 12  0
4457     -------------------------------------
4458            13  0 14  | 15 16 17  |  0  0
4459     Proc1   0 18  0  | 19 20 21  |  0  0
4460             0  0  0  | 22 23  0  | 24  0
4461     -------------------------------------
4462     Proc2  25 26 27  |  0  0 28  | 29  0
4463            30  0  0  | 31 32 33  |  0 34
4464 .ve
4465 
4466    This can be represented as a collection of submatrices as
4467 
4468 .vb
4469       A B C
4470       D E F
4471       G H I
4472 .ve
4473 
4474    Where the submatrices A,B,C are owned by proc0, D,E,F are
4475    owned by proc1, G,H,I are owned by proc2.
4476 
4477    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4478    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4479    The 'M','N' parameters are 8,8, and have the same values on all procs.
4480 
4481    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4482    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4483    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4484    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4485    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4486    matrix, ans [DF] as another SeqAIJ matrix.
4487 
4488    When d_nz, o_nz parameters are specified, d_nz storage elements are
4489    allocated for every row of the local diagonal submatrix, and o_nz
4490    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4491    One way to choose d_nz and o_nz is to use the max nonzerors per local
4492    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4493    In this case, the values of d_nz,o_nz are
4494 .vb
4495      proc0 : dnz = 2, o_nz = 2
4496      proc1 : dnz = 3, o_nz = 2
4497      proc2 : dnz = 1, o_nz = 4
4498 .ve
4499    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4500    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4501    for proc3. i.e we are using 12+15+10=37 storage locations to store
4502    34 values.
4503 
4504    When d_nnz, o_nnz parameters are specified, the storage is specified
4505    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4506    In the above case the values for d_nnz,o_nnz are
4507 .vb
4508      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4509      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4510      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4511 .ve
4512    Here the space allocated is sum of all the above values i.e 34, and
4513    hence pre-allocation is perfect.
4514 
4515    Level: intermediate
4516 
4517 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4518           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4519 @*/
4520 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4521 {
4522   PetscErrorCode ierr;
4523   PetscMPIInt    size;
4524 
4525   PetscFunctionBegin;
4526   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4527   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4529   if (size > 1) {
4530     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4531     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4532   } else {
4533     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4534     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4535   }
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4540 {
4541   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4542   PetscBool      flg;
4543   PetscErrorCode ierr;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4547   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4548   if (Ad)     *Ad     = a->A;
4549   if (Ao)     *Ao     = a->B;
4550   if (colmap) *colmap = a->garray;
4551   PetscFunctionReturn(0);
4552 }
4553 
4554 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4555 {
4556   PetscErrorCode ierr;
4557   PetscInt       m,N,i,rstart,nnz,Ii;
4558   PetscInt       *indx;
4559   PetscScalar    *values;
4560 
4561   PetscFunctionBegin;
4562   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4563   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4564     PetscInt       *dnz,*onz,sum,bs,cbs;
4565 
4566     if (n == PETSC_DECIDE) {
4567       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4568     }
4569     /* Check sum(n) = N */
4570     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4571     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4572 
4573     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4574     rstart -= m;
4575 
4576     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4577     for (i=0; i<m; i++) {
4578       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4579       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4580       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4581     }
4582 
4583     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4584     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4585     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4586     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4587     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4588     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4589     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4590     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4591   }
4592 
4593   /* numeric phase */
4594   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4595   for (i=0; i<m; i++) {
4596     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4597     Ii   = i + rstart;
4598     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4599     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4600   }
4601   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603   PetscFunctionReturn(0);
4604 }
4605 
4606 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4607 {
4608   PetscErrorCode    ierr;
4609   PetscMPIInt       rank;
4610   PetscInt          m,N,i,rstart,nnz;
4611   size_t            len;
4612   const PetscInt    *indx;
4613   PetscViewer       out;
4614   char              *name;
4615   Mat               B;
4616   const PetscScalar *values;
4617 
4618   PetscFunctionBegin;
4619   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4620   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4621   /* Should this be the type of the diagonal block of A? */
4622   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4623   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4624   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4625   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4626   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4627   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4628   for (i=0; i<m; i++) {
4629     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4630     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4631     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4632   }
4633   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4634   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4635 
4636   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4637   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4638   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4639   sprintf(name,"%s.%d",outfile,rank);
4640   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4641   ierr = PetscFree(name);CHKERRQ(ierr);
4642   ierr = MatView(B,out);CHKERRQ(ierr);
4643   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4644   ierr = MatDestroy(&B);CHKERRQ(ierr);
4645   PetscFunctionReturn(0);
4646 }
4647 
4648 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4649 {
4650   PetscErrorCode      ierr;
4651   Mat_Merge_SeqsToMPI *merge;
4652   PetscContainer      container;
4653 
4654   PetscFunctionBegin;
4655   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4656   if (container) {
4657     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4658     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4659     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4660     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4661     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4662     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4663     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4665     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4666     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4667     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4668     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4669     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4670     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4671     ierr = PetscFree(merge);CHKERRQ(ierr);
4672     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4673   }
4674   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4675   PetscFunctionReturn(0);
4676 }
4677 
4678 #include <../src/mat/utils/freespace.h>
4679 #include <petscbt.h>
4680 
4681 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4682 {
4683   PetscErrorCode      ierr;
4684   MPI_Comm            comm;
4685   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4686   PetscMPIInt         size,rank,taga,*len_s;
4687   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4688   PetscInt            proc,m;
4689   PetscInt            **buf_ri,**buf_rj;
4690   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4691   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4692   MPI_Request         *s_waits,*r_waits;
4693   MPI_Status          *status;
4694   MatScalar           *aa=a->a;
4695   MatScalar           **abuf_r,*ba_i;
4696   Mat_Merge_SeqsToMPI *merge;
4697   PetscContainer      container;
4698 
4699   PetscFunctionBegin;
4700   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4701   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4702 
4703   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4704   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4705 
4706   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4707   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4708 
4709   bi     = merge->bi;
4710   bj     = merge->bj;
4711   buf_ri = merge->buf_ri;
4712   buf_rj = merge->buf_rj;
4713 
4714   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4715   owners = merge->rowmap->range;
4716   len_s  = merge->len_s;
4717 
4718   /* send and recv matrix values */
4719   /*-----------------------------*/
4720   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4721   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4722 
4723   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4724   for (proc=0,k=0; proc<size; proc++) {
4725     if (!len_s[proc]) continue;
4726     i    = owners[proc];
4727     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4728     k++;
4729   }
4730 
4731   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4732   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4733   ierr = PetscFree(status);CHKERRQ(ierr);
4734 
4735   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4736   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4737 
4738   /* insert mat values of mpimat */
4739   /*----------------------------*/
4740   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4741   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4742 
4743   for (k=0; k<merge->nrecv; k++) {
4744     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4745     nrows       = *(buf_ri_k[k]);
4746     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4747     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4748   }
4749 
4750   /* set values of ba */
4751   m = merge->rowmap->n;
4752   for (i=0; i<m; i++) {
4753     arow = owners[rank] + i;
4754     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4755     bnzi = bi[i+1] - bi[i];
4756     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4757 
4758     /* add local non-zero vals of this proc's seqmat into ba */
4759     anzi   = ai[arow+1] - ai[arow];
4760     aj     = a->j + ai[arow];
4761     aa     = a->a + ai[arow];
4762     nextaj = 0;
4763     for (j=0; nextaj<anzi; j++) {
4764       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4765         ba_i[j] += aa[nextaj++];
4766       }
4767     }
4768 
4769     /* add received vals into ba */
4770     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4771       /* i-th row */
4772       if (i == *nextrow[k]) {
4773         anzi   = *(nextai[k]+1) - *nextai[k];
4774         aj     = buf_rj[k] + *(nextai[k]);
4775         aa     = abuf_r[k] + *(nextai[k]);
4776         nextaj = 0;
4777         for (j=0; nextaj<anzi; j++) {
4778           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4779             ba_i[j] += aa[nextaj++];
4780           }
4781         }
4782         nextrow[k]++; nextai[k]++;
4783       }
4784     }
4785     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4786   }
4787   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4788   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4789 
4790   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4791   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4792   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4793   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4794   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4795   PetscFunctionReturn(0);
4796 }
4797 
4798 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4799 {
4800   PetscErrorCode      ierr;
4801   Mat                 B_mpi;
4802   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4803   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4804   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4805   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4806   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4807   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4808   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4809   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4810   MPI_Status          *status;
4811   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4812   PetscBT             lnkbt;
4813   Mat_Merge_SeqsToMPI *merge;
4814   PetscContainer      container;
4815 
4816   PetscFunctionBegin;
4817   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4818 
4819   /* make sure it is a PETSc comm */
4820   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4822   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4823 
4824   ierr = PetscNew(&merge);CHKERRQ(ierr);
4825   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4826 
4827   /* determine row ownership */
4828   /*---------------------------------------------------------*/
4829   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4830   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4831   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4832   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4833   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4834   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4835   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4836 
4837   m      = merge->rowmap->n;
4838   owners = merge->rowmap->range;
4839 
4840   /* determine the number of messages to send, their lengths */
4841   /*---------------------------------------------------------*/
4842   len_s = merge->len_s;
4843 
4844   len          = 0; /* length of buf_si[] */
4845   merge->nsend = 0;
4846   for (proc=0; proc<size; proc++) {
4847     len_si[proc] = 0;
4848     if (proc == rank) {
4849       len_s[proc] = 0;
4850     } else {
4851       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4852       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4853     }
4854     if (len_s[proc]) {
4855       merge->nsend++;
4856       nrows = 0;
4857       for (i=owners[proc]; i<owners[proc+1]; i++) {
4858         if (ai[i+1] > ai[i]) nrows++;
4859       }
4860       len_si[proc] = 2*(nrows+1);
4861       len         += len_si[proc];
4862     }
4863   }
4864 
4865   /* determine the number and length of messages to receive for ij-structure */
4866   /*-------------------------------------------------------------------------*/
4867   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4868   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4869 
4870   /* post the Irecv of j-structure */
4871   /*-------------------------------*/
4872   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4873   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4874 
4875   /* post the Isend of j-structure */
4876   /*--------------------------------*/
4877   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4878 
4879   for (proc=0, k=0; proc<size; proc++) {
4880     if (!len_s[proc]) continue;
4881     i    = owners[proc];
4882     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4883     k++;
4884   }
4885 
4886   /* receives and sends of j-structure are complete */
4887   /*------------------------------------------------*/
4888   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4889   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4890 
4891   /* send and recv i-structure */
4892   /*---------------------------*/
4893   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4894   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4895 
4896   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4897   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4898   for (proc=0,k=0; proc<size; proc++) {
4899     if (!len_s[proc]) continue;
4900     /* form outgoing message for i-structure:
4901          buf_si[0]:                 nrows to be sent
4902                [1:nrows]:           row index (global)
4903                [nrows+1:2*nrows+1]: i-structure index
4904     */
4905     /*-------------------------------------------*/
4906     nrows       = len_si[proc]/2 - 1;
4907     buf_si_i    = buf_si + nrows+1;
4908     buf_si[0]   = nrows;
4909     buf_si_i[0] = 0;
4910     nrows       = 0;
4911     for (i=owners[proc]; i<owners[proc+1]; i++) {
4912       anzi = ai[i+1] - ai[i];
4913       if (anzi) {
4914         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4915         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4916         nrows++;
4917       }
4918     }
4919     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4920     k++;
4921     buf_si += len_si[proc];
4922   }
4923 
4924   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4925   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4926 
4927   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4928   for (i=0; i<merge->nrecv; i++) {
4929     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4930   }
4931 
4932   ierr = PetscFree(len_si);CHKERRQ(ierr);
4933   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4934   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4935   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4936   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4937   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4938   ierr = PetscFree(status);CHKERRQ(ierr);
4939 
4940   /* compute a local seq matrix in each processor */
4941   /*----------------------------------------------*/
4942   /* allocate bi array and free space for accumulating nonzero column info */
4943   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4944   bi[0] = 0;
4945 
4946   /* create and initialize a linked list */
4947   nlnk = N+1;
4948   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4949 
4950   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4951   len  = ai[owners[rank+1]] - ai[owners[rank]];
4952   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4953 
4954   current_space = free_space;
4955 
4956   /* determine symbolic info for each local row */
4957   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4958 
4959   for (k=0; k<merge->nrecv; k++) {
4960     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4961     nrows       = *buf_ri_k[k];
4962     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4963     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4964   }
4965 
4966   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4967   len  = 0;
4968   for (i=0; i<m; i++) {
4969     bnzi = 0;
4970     /* add local non-zero cols of this proc's seqmat into lnk */
4971     arow  = owners[rank] + i;
4972     anzi  = ai[arow+1] - ai[arow];
4973     aj    = a->j + ai[arow];
4974     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4975     bnzi += nlnk;
4976     /* add received col data into lnk */
4977     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4978       if (i == *nextrow[k]) { /* i-th row */
4979         anzi  = *(nextai[k]+1) - *nextai[k];
4980         aj    = buf_rj[k] + *nextai[k];
4981         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4982         bnzi += nlnk;
4983         nextrow[k]++; nextai[k]++;
4984       }
4985     }
4986     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4987 
4988     /* if free space is not available, make more free space */
4989     if (current_space->local_remaining<bnzi) {
4990       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4991       nspacedouble++;
4992     }
4993     /* copy data into free space, then initialize lnk */
4994     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4995     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4996 
4997     current_space->array           += bnzi;
4998     current_space->local_used      += bnzi;
4999     current_space->local_remaining -= bnzi;
5000 
5001     bi[i+1] = bi[i] + bnzi;
5002   }
5003 
5004   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5005 
5006   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5007   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5008   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5009 
5010   /* create symbolic parallel matrix B_mpi */
5011   /*---------------------------------------*/
5012   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5013   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5014   if (n==PETSC_DECIDE) {
5015     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5016   } else {
5017     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5018   }
5019   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5020   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5021   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5022   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5023   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5024 
5025   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5026   B_mpi->assembled    = PETSC_FALSE;
5027   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5028   merge->bi           = bi;
5029   merge->bj           = bj;
5030   merge->buf_ri       = buf_ri;
5031   merge->buf_rj       = buf_rj;
5032   merge->coi          = NULL;
5033   merge->coj          = NULL;
5034   merge->owners_co    = NULL;
5035 
5036   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5037 
5038   /* attach the supporting struct to B_mpi for reuse */
5039   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5040   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5041   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5042   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5043   *mpimat = B_mpi;
5044 
5045   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5046   PetscFunctionReturn(0);
5047 }
5048 
5049 /*@C
5050       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5051                  matrices from each processor
5052 
5053     Collective
5054 
5055    Input Parameters:
5056 +    comm - the communicators the parallel matrix will live on
5057 .    seqmat - the input sequential matrices
5058 .    m - number of local rows (or PETSC_DECIDE)
5059 .    n - number of local columns (or PETSC_DECIDE)
5060 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5061 
5062    Output Parameter:
5063 .    mpimat - the parallel matrix generated
5064 
5065     Level: advanced
5066 
5067    Notes:
5068      The dimensions of the sequential matrix in each processor MUST be the same.
5069      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5070      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5071 @*/
5072 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5073 {
5074   PetscErrorCode ierr;
5075   PetscMPIInt    size;
5076 
5077   PetscFunctionBegin;
5078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5079   if (size == 1) {
5080     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5081     if (scall == MAT_INITIAL_MATRIX) {
5082       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5083     } else {
5084       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5085     }
5086     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5087     PetscFunctionReturn(0);
5088   }
5089   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5090   if (scall == MAT_INITIAL_MATRIX) {
5091     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5092   }
5093   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5094   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5095   PetscFunctionReturn(0);
5096 }
5097 
5098 /*@
5099      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5100           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5101           with MatGetSize()
5102 
5103     Not Collective
5104 
5105    Input Parameters:
5106 +    A - the matrix
5107 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5108 
5109    Output Parameter:
5110 .    A_loc - the local sequential matrix generated
5111 
5112     Level: developer
5113 
5114 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5115 
5116 @*/
5117 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5118 {
5119   PetscErrorCode ierr;
5120   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5121   Mat_SeqAIJ     *mat,*a,*b;
5122   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5123   MatScalar      *aa,*ba,*cam;
5124   PetscScalar    *ca;
5125   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5126   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5127   PetscBool      match;
5128   MPI_Comm       comm;
5129   PetscMPIInt    size;
5130 
5131   PetscFunctionBegin;
5132   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5133   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5134   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5135   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5136   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5137 
5138   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5139   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5140   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5141   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5142   aa = a->a; ba = b->a;
5143   if (scall == MAT_INITIAL_MATRIX) {
5144     if (size == 1) {
5145       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5146       PetscFunctionReturn(0);
5147     }
5148 
5149     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5150     ci[0] = 0;
5151     for (i=0; i<am; i++) {
5152       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5153     }
5154     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5155     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5156     k    = 0;
5157     for (i=0; i<am; i++) {
5158       ncols_o = bi[i+1] - bi[i];
5159       ncols_d = ai[i+1] - ai[i];
5160       /* off-diagonal portion of A */
5161       for (jo=0; jo<ncols_o; jo++) {
5162         col = cmap[*bj];
5163         if (col >= cstart) break;
5164         cj[k]   = col; bj++;
5165         ca[k++] = *ba++;
5166       }
5167       /* diagonal portion of A */
5168       for (j=0; j<ncols_d; j++) {
5169         cj[k]   = cstart + *aj++;
5170         ca[k++] = *aa++;
5171       }
5172       /* off-diagonal portion of A */
5173       for (j=jo; j<ncols_o; j++) {
5174         cj[k]   = cmap[*bj++];
5175         ca[k++] = *ba++;
5176       }
5177     }
5178     /* put together the new matrix */
5179     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5180     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5181     /* Since these are PETSc arrays, change flags to free them as necessary. */
5182     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5183     mat->free_a  = PETSC_TRUE;
5184     mat->free_ij = PETSC_TRUE;
5185     mat->nonew   = 0;
5186   } else if (scall == MAT_REUSE_MATRIX) {
5187     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5188     ci = mat->i; cj = mat->j; cam = mat->a;
5189     for (i=0; i<am; i++) {
5190       /* off-diagonal portion of A */
5191       ncols_o = bi[i+1] - bi[i];
5192       for (jo=0; jo<ncols_o; jo++) {
5193         col = cmap[*bj];
5194         if (col >= cstart) break;
5195         *cam++ = *ba++; bj++;
5196       }
5197       /* diagonal portion of A */
5198       ncols_d = ai[i+1] - ai[i];
5199       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5200       /* off-diagonal portion of A */
5201       for (j=jo; j<ncols_o; j++) {
5202         *cam++ = *ba++; bj++;
5203       }
5204     }
5205   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5206   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5207   PetscFunctionReturn(0);
5208 }
5209 
5210 /*@C
5211      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5212 
5213     Not Collective
5214 
5215    Input Parameters:
5216 +    A - the matrix
5217 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5218 -    row, col - index sets of rows and columns to extract (or NULL)
5219 
5220    Output Parameter:
5221 .    A_loc - the local sequential matrix generated
5222 
5223     Level: developer
5224 
5225 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5226 
5227 @*/
5228 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5229 {
5230   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5231   PetscErrorCode ierr;
5232   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5233   IS             isrowa,iscola;
5234   Mat            *aloc;
5235   PetscBool      match;
5236 
5237   PetscFunctionBegin;
5238   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5239   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5240   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5241   if (!row) {
5242     start = A->rmap->rstart; end = A->rmap->rend;
5243     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5244   } else {
5245     isrowa = *row;
5246   }
5247   if (!col) {
5248     start = A->cmap->rstart;
5249     cmap  = a->garray;
5250     nzA   = a->A->cmap->n;
5251     nzB   = a->B->cmap->n;
5252     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5253     ncols = 0;
5254     for (i=0; i<nzB; i++) {
5255       if (cmap[i] < start) idx[ncols++] = cmap[i];
5256       else break;
5257     }
5258     imark = i;
5259     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5260     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5261     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5262   } else {
5263     iscola = *col;
5264   }
5265   if (scall != MAT_INITIAL_MATRIX) {
5266     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5267     aloc[0] = *A_loc;
5268   }
5269   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5270   if (!col) { /* attach global id of condensed columns */
5271     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5272   }
5273   *A_loc = aloc[0];
5274   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5275   if (!row) {
5276     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5277   }
5278   if (!col) {
5279     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5280   }
5281   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5282   PetscFunctionReturn(0);
5283 }
5284 
5285 /*
5286  * Destroy a mat that may be composed with PetscSF communication objects.
5287  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5288  * */
5289 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5290 {
5291   PetscSF          sf,osf;
5292   IS               map;
5293   PetscErrorCode   ierr;
5294 
5295   PetscFunctionBegin;
5296   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5297   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5298   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5299   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5300   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5301   ierr = ISDestroy(&map);CHKERRQ(ierr);
5302   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5303   PetscFunctionReturn(0);
5304 }
5305 
5306 /*
5307  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5308  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5309  * on a global size.
5310  * */
5311 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5312 {
5313   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5314   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5315   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5316   PetscMPIInt              owner;
5317   PetscSFNode              *iremote,*oiremote;
5318   const PetscInt           *lrowindices;
5319   PetscErrorCode           ierr;
5320   PetscSF                  sf,osf;
5321   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5322   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5323   MPI_Comm                 comm;
5324   ISLocalToGlobalMapping   mapping;
5325 
5326   PetscFunctionBegin;
5327   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5328   /* plocalsize is the number of roots
5329    * nrows is the number of leaves
5330    * */
5331   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5332   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5333   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5334   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5335   for (i=0;i<nrows;i++) {
5336     /* Find a remote index and an owner for a row
5337      * The row could be local or remote
5338      * */
5339     owner = 0;
5340     lidx  = 0;
5341     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5342     iremote[i].index = lidx;
5343     iremote[i].rank  = owner;
5344   }
5345   /* Create SF to communicate how many nonzero columns for each row */
5346   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5347   /* SF will figure out the number of nonzero colunms for each row, and their
5348    * offsets
5349    * */
5350   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5351   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5352   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5353 
5354   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5355   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5356   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5357   roffsets[0] = 0;
5358   roffsets[1] = 0;
5359   for (i=0;i<plocalsize;i++) {
5360     /* diag */
5361     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5362     /* off diag */
5363     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5364     /* compute offsets so that we relative location for each row */
5365     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5366     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5367   }
5368   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5369   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5370   /* 'r' means root, and 'l' means leaf */
5371   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5372   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5373   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5374   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5375   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5376   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5377   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5378   dntotalcols = 0;
5379   ontotalcols = 0;
5380   ncol = 0;
5381   for (i=0;i<nrows;i++) {
5382     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5383     ncol = PetscMax(pnnz[i],ncol);
5384     /* diag */
5385     dntotalcols += nlcols[i*2+0];
5386     /* off diag */
5387     ontotalcols += nlcols[i*2+1];
5388   }
5389   /* We do not need to figure the right number of columns
5390    * since all the calculations will be done by going through the raw data
5391    * */
5392   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5393   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5394   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5395   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5396   /* diag */
5397   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5398   /* off diag */
5399   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5400   /* diag */
5401   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5402   /* off diag */
5403   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5404   dntotalcols = 0;
5405   ontotalcols = 0;
5406   ntotalcols  = 0;
5407   for (i=0;i<nrows;i++) {
5408     owner = 0;
5409     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5410     /* Set iremote for diag matrix */
5411     for (j=0;j<nlcols[i*2+0];j++) {
5412       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5413       iremote[dntotalcols].rank    = owner;
5414       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5415       ilocal[dntotalcols++]        = ntotalcols++;
5416     }
5417     /* off diag */
5418     for (j=0;j<nlcols[i*2+1];j++) {
5419       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5420       oiremote[ontotalcols].rank    = owner;
5421       oilocal[ontotalcols++]        = ntotalcols++;
5422     }
5423   }
5424   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5425   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5426   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5427   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5428   /* P serves as roots and P_oth is leaves
5429    * Diag matrix
5430    * */
5431   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5432   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5433   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5434 
5435   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5436   /* Off diag */
5437   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5438   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5439   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5440   /* We operate on the matrix internal data for saving memory */
5441   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5442   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5443   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5444   /* Convert to global indices for diag matrix */
5445   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5446   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5447   /* We want P_oth store global indices */
5448   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5449   /* Use memory scalable approach */
5450   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5451   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5452   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5453   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5454   /* Convert back to local indices */
5455   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5456   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5457   nout = 0;
5458   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5459   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5460   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5461   /* Exchange values */
5462   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5463   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5464   /* Stop PETSc from shrinking memory */
5465   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5466   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5467   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5468   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5469   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5470   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5471   /* ``New MatDestroy" takes care of PetscSF objects as well */
5472   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5473   PetscFunctionReturn(0);
5474 }
5475 
5476 /*
5477  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5478  * This supports MPIAIJ and MAIJ
5479  * */
5480 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5481 {
5482   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5483   Mat_SeqAIJ            *p_oth;
5484   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5485   IS                    rows,map;
5486   PetscHMapI            hamp;
5487   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5488   MPI_Comm              comm;
5489   PetscSF               sf,osf;
5490   PetscBool             has;
5491   PetscErrorCode        ierr;
5492 
5493   PetscFunctionBegin;
5494   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5495   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5496   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5497    *  and then create a submatrix (that often is an overlapping matrix)
5498    * */
5499   if (reuse==MAT_INITIAL_MATRIX) {
5500     /* Use a hash table to figure out unique keys */
5501     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5502     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5503     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5504     count = 0;
5505     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5506     for (i=0;i<a->B->cmap->n;i++) {
5507       key  = a->garray[i]/dof;
5508       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5509       if (!has) {
5510         mapping[i] = count;
5511         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5512       } else {
5513         /* Current 'i' has the same value the previous step */
5514         mapping[i] = count-1;
5515       }
5516     }
5517     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5518     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5519     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5520     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5521     off = 0;
5522     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5523     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5524     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5525     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5526     /* In case, the matrix was already created but users want to recreate the matrix */
5527     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5528     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5529     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5530     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5531   } else if (reuse==MAT_REUSE_MATRIX) {
5532     /* If matrix was already created, we simply update values using SF objects
5533      * that as attached to the matrix ealier.
5534      *  */
5535     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5536     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5537     if (!sf || !osf) {
5538       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5539     }
5540     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5541     /* Update values in place */
5542     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5543     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5544     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5545     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5546   } else {
5547     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5548   }
5549   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 /*@C
5554     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5555 
5556     Collective on Mat
5557 
5558    Input Parameters:
5559 +    A,B - the matrices in mpiaij format
5560 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5561 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5562 
5563    Output Parameter:
5564 +    rowb, colb - index sets of rows and columns of B to extract
5565 -    B_seq - the sequential matrix generated
5566 
5567     Level: developer
5568 
5569 @*/
5570 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5571 {
5572   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5573   PetscErrorCode ierr;
5574   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5575   IS             isrowb,iscolb;
5576   Mat            *bseq=NULL;
5577 
5578   PetscFunctionBegin;
5579   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5580     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5581   }
5582   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5583 
5584   if (scall == MAT_INITIAL_MATRIX) {
5585     start = A->cmap->rstart;
5586     cmap  = a->garray;
5587     nzA   = a->A->cmap->n;
5588     nzB   = a->B->cmap->n;
5589     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5590     ncols = 0;
5591     for (i=0; i<nzB; i++) {  /* row < local row index */
5592       if (cmap[i] < start) idx[ncols++] = cmap[i];
5593       else break;
5594     }
5595     imark = i;
5596     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5597     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5598     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5599     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5600   } else {
5601     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5602     isrowb  = *rowb; iscolb = *colb;
5603     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5604     bseq[0] = *B_seq;
5605   }
5606   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5607   *B_seq = bseq[0];
5608   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5609   if (!rowb) {
5610     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5611   } else {
5612     *rowb = isrowb;
5613   }
5614   if (!colb) {
5615     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5616   } else {
5617     *colb = iscolb;
5618   }
5619   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5620   PetscFunctionReturn(0);
5621 }
5622 
5623 /*
5624     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5625     of the OFF-DIAGONAL portion of local A
5626 
5627     Collective on Mat
5628 
5629    Input Parameters:
5630 +    A,B - the matrices in mpiaij format
5631 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5632 
5633    Output Parameter:
5634 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5635 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5636 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5637 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5638 
5639     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5640      for this matrix. This is not desirable..
5641 
5642     Level: developer
5643 
5644 */
5645 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5646 {
5647   PetscErrorCode         ierr;
5648   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5649   Mat_SeqAIJ             *b_oth;
5650   VecScatter             ctx;
5651   MPI_Comm               comm;
5652   const PetscMPIInt      *rprocs,*sprocs;
5653   const PetscInt         *srow,*rstarts,*sstarts;
5654   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5655   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5656   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5657   MPI_Request            *rwaits = NULL,*swaits = NULL;
5658   MPI_Status             rstatus;
5659   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5660 
5661   PetscFunctionBegin;
5662   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5663   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5664 
5665   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5666     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5667   }
5668   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5670 
5671   if (size == 1) {
5672     startsj_s = NULL;
5673     bufa_ptr  = NULL;
5674     *B_oth    = NULL;
5675     PetscFunctionReturn(0);
5676   }
5677 
5678   ctx = a->Mvctx;
5679   tag = ((PetscObject)ctx)->tag;
5680 
5681   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5682   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5683   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5684   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5685   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5686   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5687   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5688 
5689   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5690   if (scall == MAT_INITIAL_MATRIX) {
5691     /* i-array */
5692     /*---------*/
5693     /*  post receives */
5694     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5695     for (i=0; i<nrecvs; i++) {
5696       rowlen = rvalues + rstarts[i]*rbs;
5697       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5698       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5699     }
5700 
5701     /* pack the outgoing message */
5702     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5703 
5704     sstartsj[0] = 0;
5705     rstartsj[0] = 0;
5706     len         = 0; /* total length of j or a array to be sent */
5707     if (nsends) {
5708       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5709       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5710     }
5711     for (i=0; i<nsends; i++) {
5712       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5713       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5714       for (j=0; j<nrows; j++) {
5715         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5716         for (l=0; l<sbs; l++) {
5717           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5718 
5719           rowlen[j*sbs+l] = ncols;
5720 
5721           len += ncols;
5722           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5723         }
5724         k++;
5725       }
5726       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5727 
5728       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5729     }
5730     /* recvs and sends of i-array are completed */
5731     i = nrecvs;
5732     while (i--) {
5733       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5734     }
5735     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5736     ierr = PetscFree(svalues);CHKERRQ(ierr);
5737 
5738     /* allocate buffers for sending j and a arrays */
5739     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5740     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5741 
5742     /* create i-array of B_oth */
5743     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5744 
5745     b_othi[0] = 0;
5746     len       = 0; /* total length of j or a array to be received */
5747     k         = 0;
5748     for (i=0; i<nrecvs; i++) {
5749       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5750       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5751       for (j=0; j<nrows; j++) {
5752         b_othi[k+1] = b_othi[k] + rowlen[j];
5753         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5754         k++;
5755       }
5756       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5757     }
5758     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5759 
5760     /* allocate space for j and a arrrays of B_oth */
5761     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5762     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5763 
5764     /* j-array */
5765     /*---------*/
5766     /*  post receives of j-array */
5767     for (i=0; i<nrecvs; i++) {
5768       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5769       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5770     }
5771 
5772     /* pack the outgoing message j-array */
5773     if (nsends) k = sstarts[0];
5774     for (i=0; i<nsends; i++) {
5775       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5776       bufJ  = bufj+sstartsj[i];
5777       for (j=0; j<nrows; j++) {
5778         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5779         for (ll=0; ll<sbs; ll++) {
5780           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5781           for (l=0; l<ncols; l++) {
5782             *bufJ++ = cols[l];
5783           }
5784           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5785         }
5786       }
5787       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5788     }
5789 
5790     /* recvs and sends of j-array are completed */
5791     i = nrecvs;
5792     while (i--) {
5793       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5794     }
5795     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5796   } else if (scall == MAT_REUSE_MATRIX) {
5797     sstartsj = *startsj_s;
5798     rstartsj = *startsj_r;
5799     bufa     = *bufa_ptr;
5800     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5801     b_otha   = b_oth->a;
5802   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5803 
5804   /* a-array */
5805   /*---------*/
5806   /*  post receives of a-array */
5807   for (i=0; i<nrecvs; i++) {
5808     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5809     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5810   }
5811 
5812   /* pack the outgoing message a-array */
5813   if (nsends) k = sstarts[0];
5814   for (i=0; i<nsends; i++) {
5815     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5816     bufA  = bufa+sstartsj[i];
5817     for (j=0; j<nrows; j++) {
5818       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5819       for (ll=0; ll<sbs; ll++) {
5820         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5821         for (l=0; l<ncols; l++) {
5822           *bufA++ = vals[l];
5823         }
5824         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5825       }
5826     }
5827     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5828   }
5829   /* recvs and sends of a-array are completed */
5830   i = nrecvs;
5831   while (i--) {
5832     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5833   }
5834   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5835   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5836 
5837   if (scall == MAT_INITIAL_MATRIX) {
5838     /* put together the new matrix */
5839     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5840 
5841     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5842     /* Since these are PETSc arrays, change flags to free them as necessary. */
5843     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5844     b_oth->free_a  = PETSC_TRUE;
5845     b_oth->free_ij = PETSC_TRUE;
5846     b_oth->nonew   = 0;
5847 
5848     ierr = PetscFree(bufj);CHKERRQ(ierr);
5849     if (!startsj_s || !bufa_ptr) {
5850       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5851       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5852     } else {
5853       *startsj_s = sstartsj;
5854       *startsj_r = rstartsj;
5855       *bufa_ptr  = bufa;
5856     }
5857   }
5858 
5859   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5860   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5861   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5862   PetscFunctionReturn(0);
5863 }
5864 
5865 /*@C
5866   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5867 
5868   Not Collective
5869 
5870   Input Parameters:
5871 . A - The matrix in mpiaij format
5872 
5873   Output Parameter:
5874 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5875 . colmap - A map from global column index to local index into lvec
5876 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5877 
5878   Level: developer
5879 
5880 @*/
5881 #if defined(PETSC_USE_CTABLE)
5882 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5883 #else
5884 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5885 #endif
5886 {
5887   Mat_MPIAIJ *a;
5888 
5889   PetscFunctionBegin;
5890   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5891   PetscValidPointer(lvec, 2);
5892   PetscValidPointer(colmap, 3);
5893   PetscValidPointer(multScatter, 4);
5894   a = (Mat_MPIAIJ*) A->data;
5895   if (lvec) *lvec = a->lvec;
5896   if (colmap) *colmap = a->colmap;
5897   if (multScatter) *multScatter = a->Mvctx;
5898   PetscFunctionReturn(0);
5899 }
5900 
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5904 #if defined(PETSC_HAVE_MKL_SPARSE)
5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5906 #endif
5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5908 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5909 #if defined(PETSC_HAVE_ELEMENTAL)
5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5911 #endif
5912 #if defined(PETSC_HAVE_HYPRE)
5913 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5914 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5915 #endif
5916 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5918 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5919 
5920 /*
5921     Computes (B'*A')' since computing B*A directly is untenable
5922 
5923                n                       p                          p
5924         (              )       (              )         (                  )
5925       m (      A       )  *  n (       B      )   =   m (         C        )
5926         (              )       (              )         (                  )
5927 
5928 */
5929 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5930 {
5931   PetscErrorCode ierr;
5932   Mat            At,Bt,Ct;
5933 
5934   PetscFunctionBegin;
5935   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5936   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5937   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5938   ierr = MatDestroy(&At);CHKERRQ(ierr);
5939   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5940   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5941   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5942   PetscFunctionReturn(0);
5943 }
5944 
5945 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5946 {
5947   PetscErrorCode ierr;
5948   PetscInt       m=A->rmap->n,n=B->cmap->n;
5949   Mat            Cmat;
5950 
5951   PetscFunctionBegin;
5952   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5953   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5954   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5955   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5956   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5957   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5958   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5959   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5960 
5961   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5962 
5963   *C = Cmat;
5964   PetscFunctionReturn(0);
5965 }
5966 
5967 /* ----------------------------------------------------------------*/
5968 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5969 {
5970   PetscErrorCode ierr;
5971 
5972   PetscFunctionBegin;
5973   if (scall == MAT_INITIAL_MATRIX) {
5974     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5975     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5976     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5977   }
5978   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5979   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5980   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5981   PetscFunctionReturn(0);
5982 }
5983 
5984 /*MC
5985    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5986 
5987    Options Database Keys:
5988 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5989 
5990    Level: beginner
5991 
5992    Notes:
5993     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5994     in this case the values associated with the rows and columns one passes in are set to zero
5995     in the matrix
5996 
5997     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5998     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5999 
6000 .seealso: MatCreateAIJ()
6001 M*/
6002 
6003 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6004 {
6005   Mat_MPIAIJ     *b;
6006   PetscErrorCode ierr;
6007   PetscMPIInt    size;
6008 
6009   PetscFunctionBegin;
6010   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6011 
6012   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6013   B->data       = (void*)b;
6014   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6015   B->assembled  = PETSC_FALSE;
6016   B->insertmode = NOT_SET_VALUES;
6017   b->size       = size;
6018 
6019   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6020 
6021   /* build cache for off array entries formed */
6022   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6023 
6024   b->donotstash  = PETSC_FALSE;
6025   b->colmap      = 0;
6026   b->garray      = 0;
6027   b->roworiented = PETSC_TRUE;
6028 
6029   /* stuff used for matrix vector multiply */
6030   b->lvec  = NULL;
6031   b->Mvctx = NULL;
6032 
6033   /* stuff for MatGetRow() */
6034   b->rowindices   = 0;
6035   b->rowvalues    = 0;
6036   b->getrowactive = PETSC_FALSE;
6037 
6038   /* flexible pointer used in CUSP/CUSPARSE classes */
6039   b->spptr = NULL;
6040 
6041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6051 #if defined(PETSC_HAVE_MKL_SPARSE)
6052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6053 #endif
6054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6057 #if defined(PETSC_HAVE_ELEMENTAL)
6058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6059 #endif
6060 #if defined(PETSC_HAVE_HYPRE)
6061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6062 #endif
6063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6068 #if defined(PETSC_HAVE_HYPRE)
6069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6070 #endif
6071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6072   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6073   PetscFunctionReturn(0);
6074 }
6075 
6076 /*@C
6077      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6078          and "off-diagonal" part of the matrix in CSR format.
6079 
6080    Collective
6081 
6082    Input Parameters:
6083 +  comm - MPI communicator
6084 .  m - number of local rows (Cannot be PETSC_DECIDE)
6085 .  n - This value should be the same as the local size used in creating the
6086        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6087        calculated if N is given) For square matrices n is almost always m.
6088 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6089 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6090 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6091 .   j - column indices
6092 .   a - matrix values
6093 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6094 .   oj - column indices
6095 -   oa - matrix values
6096 
6097    Output Parameter:
6098 .   mat - the matrix
6099 
6100    Level: advanced
6101 
6102    Notes:
6103        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6104        must free the arrays once the matrix has been destroyed and not before.
6105 
6106        The i and j indices are 0 based
6107 
6108        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6109 
6110        This sets local rows and cannot be used to set off-processor values.
6111 
6112        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6113        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6114        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6115        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6116        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6117        communication if it is known that only local entries will be set.
6118 
6119 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6120           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6121 @*/
6122 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6123 {
6124   PetscErrorCode ierr;
6125   Mat_MPIAIJ     *maij;
6126 
6127   PetscFunctionBegin;
6128   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6129   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6130   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6131   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6132   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6133   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6134   maij = (Mat_MPIAIJ*) (*mat)->data;
6135 
6136   (*mat)->preallocated = PETSC_TRUE;
6137 
6138   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6139   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6140 
6141   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6142   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6143 
6144   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6145   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6146   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6147   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6148 
6149   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6150   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6151   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6152   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6153   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6154   PetscFunctionReturn(0);
6155 }
6156 
6157 /*
6158     Special version for direct calls from Fortran
6159 */
6160 #include <petsc/private/fortranimpl.h>
6161 
6162 /* Change these macros so can be used in void function */
6163 #undef CHKERRQ
6164 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6165 #undef SETERRQ2
6166 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6167 #undef SETERRQ3
6168 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6169 #undef SETERRQ
6170 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6171 
6172 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6173 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6174 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6175 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6176 #else
6177 #endif
6178 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6179 {
6180   Mat            mat  = *mmat;
6181   PetscInt       m    = *mm, n = *mn;
6182   InsertMode     addv = *maddv;
6183   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6184   PetscScalar    value;
6185   PetscErrorCode ierr;
6186 
6187   MatCheckPreallocated(mat,1);
6188   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6189 
6190 #if defined(PETSC_USE_DEBUG)
6191   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6192 #endif
6193   {
6194     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6195     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6196     PetscBool roworiented = aij->roworiented;
6197 
6198     /* Some Variables required in the macro */
6199     Mat        A                 = aij->A;
6200     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6201     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6202     MatScalar  *aa               = a->a;
6203     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6204     Mat        B                 = aij->B;
6205     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6206     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6207     MatScalar  *ba               = b->a;
6208 
6209     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6210     PetscInt  nonew = a->nonew;
6211     MatScalar *ap1,*ap2;
6212 
6213     PetscFunctionBegin;
6214     for (i=0; i<m; i++) {
6215       if (im[i] < 0) continue;
6216 #if defined(PETSC_USE_DEBUG)
6217       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6218 #endif
6219       if (im[i] >= rstart && im[i] < rend) {
6220         row      = im[i] - rstart;
6221         lastcol1 = -1;
6222         rp1      = aj + ai[row];
6223         ap1      = aa + ai[row];
6224         rmax1    = aimax[row];
6225         nrow1    = ailen[row];
6226         low1     = 0;
6227         high1    = nrow1;
6228         lastcol2 = -1;
6229         rp2      = bj + bi[row];
6230         ap2      = ba + bi[row];
6231         rmax2    = bimax[row];
6232         nrow2    = bilen[row];
6233         low2     = 0;
6234         high2    = nrow2;
6235 
6236         for (j=0; j<n; j++) {
6237           if (roworiented) value = v[i*n+j];
6238           else value = v[i+j*m];
6239           if (in[j] >= cstart && in[j] < cend) {
6240             col = in[j] - cstart;
6241             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6242             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6243           } else if (in[j] < 0) continue;
6244 #if defined(PETSC_USE_DEBUG)
6245           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6246           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6247 #endif
6248           else {
6249             if (mat->was_assembled) {
6250               if (!aij->colmap) {
6251                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6252               }
6253 #if defined(PETSC_USE_CTABLE)
6254               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6255               col--;
6256 #else
6257               col = aij->colmap[in[j]] - 1;
6258 #endif
6259               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6260               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6261                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6262                 col  =  in[j];
6263                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6264                 B     = aij->B;
6265                 b     = (Mat_SeqAIJ*)B->data;
6266                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6267                 rp2   = bj + bi[row];
6268                 ap2   = ba + bi[row];
6269                 rmax2 = bimax[row];
6270                 nrow2 = bilen[row];
6271                 low2  = 0;
6272                 high2 = nrow2;
6273                 bm    = aij->B->rmap->n;
6274                 ba    = b->a;
6275               }
6276             } else col = in[j];
6277             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6278           }
6279         }
6280       } else if (!aij->donotstash) {
6281         if (roworiented) {
6282           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6283         } else {
6284           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6285         }
6286       }
6287     }
6288   }
6289   PetscFunctionReturnVoid();
6290 }
6291