xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 58c0e5077dcf40d6a880c19c87f1075ae1d22c8e)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
217   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) { \
443             ap1[_i] += value;   \
444             /* Not sure LogFlops will slow dow the code or not */ \
445             (void)PetscLogFlops(1.0);   \
446            } \
447           else                    ap1[_i] = value; \
448           goto a_noinsert; \
449         } \
450       }  \
451       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
452       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
453       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
454       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
455       N = nrow1++ - 1; a->nz++; high1++; \
456       /* shift up all the later entries in this row */ \
457       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
458       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
459       rp1[_i] = col;  \
460       ap1[_i] = value;  \
461       A->nonzerostate++;\
462       a_noinsert: ; \
463       ailen[row] = nrow1; \
464 }
465 
466 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
467   { \
468     if (col <= lastcol2) low2 = 0;                        \
469     else high2 = nrow2;                                   \
470     lastcol2 = col;                                       \
471     while (high2-low2 > 5) {                              \
472       t = (low2+high2)/2;                                 \
473       if (rp2[t] > col) high2 = t;                        \
474       else             low2  = t;                         \
475     }                                                     \
476     for (_i=low2; _i<high2; _i++) {                       \
477       if (rp2[_i] > col) break;                           \
478       if (rp2[_i] == col) {                               \
479         if (addv == ADD_VALUES) {                         \
480           ap2[_i] += value;                               \
481           (void)PetscLogFlops(1.0);                       \
482         }                                                 \
483         else                    ap2[_i] = value;          \
484         goto b_noinsert;                                  \
485       }                                                   \
486     }                                                     \
487     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
488     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
489     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
490     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
491     N = nrow2++ - 1; b->nz++; high2++;                    \
492     /* shift up all the later entries in this row */      \
493     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
494     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
495     rp2[_i] = col;                                        \
496     ap2[_i] = value;                                      \
497     B->nonzerostate++;                                    \
498     b_noinsert: ;                                         \
499     bilen[row] = nrow2;                                   \
500   }
501 
502 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
503 {
504   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
505   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
506   PetscErrorCode ierr;
507   PetscInt       l,*garray = mat->garray,diag;
508 
509   PetscFunctionBegin;
510   /* code only works for square matrices A */
511 
512   /* find size of row to the left of the diagonal part */
513   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
514   row  = row - diag;
515   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
516     if (garray[b->j[b->i[row]+l]] > diag) break;
517   }
518   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
519 
520   /* diagonal part */
521   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
522 
523   /* right of diagonal part */
524   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value = 0.0;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
577         if (in[j] >= cstart && in[j] < cend) {
578           col   = in[j] - cstart;
579           nonew = a->nonew;
580           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
581           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
582         } else if (in[j] < 0) continue;
583 #if defined(PETSC_USE_DEBUG)
584         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
585 #endif
586         else {
587           if (mat->was_assembled) {
588             if (!aij->colmap) {
589               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
590             }
591 #if defined(PETSC_USE_CTABLE)
592             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
593             col--;
594 #else
595             col = aij->colmap[in[j]] - 1;
596 #endif
597             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
598               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
599               col  =  in[j];
600               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
601               B     = aij->B;
602               b     = (Mat_SeqAIJ*)B->data;
603               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
604               rp2   = bj + bi[row];
605               ap2   = ba + bi[row];
606               rmax2 = bimax[row];
607               nrow2 = bilen[row];
608               low2  = 0;
609               high2 = nrow2;
610               bm    = aij->B->rmap->n;
611               ba    = b->a;
612             } else if (col < 0) {
613               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
614                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
615               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
616             }
617           } else col = in[j];
618           nonew = b->nonew;
619           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
620         }
621       }
622     } else {
623       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
624       if (!aij->donotstash) {
625         mat->assembled = PETSC_FALSE;
626         if (roworiented) {
627           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         } else {
629           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
630         }
631       }
632     }
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 /*
638     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
639     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
640     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
641 */
642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
643 {
644   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
645   Mat            A           = aij->A; /* diagonal part of the matrix */
646   Mat            B           = aij->B; /* offdiagonal part of the matrix */
647   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
648   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
649   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
650   PetscInt       *ailen      = a->ilen,*aj = a->j;
651   PetscInt       *bilen      = b->ilen,*bj = b->j;
652   PetscInt       am          = aij->A->rmap->n,j;
653   PetscInt       diag_so_far = 0,dnz;
654   PetscInt       offd_so_far = 0,onz;
655 
656   PetscFunctionBegin;
657   /* Iterate over all rows of the matrix */
658   for (j=0; j<am; j++) {
659     dnz = onz = 0;
660     /*  Iterate over all non-zero columns of the current row */
661     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
662       /* If column is in the diagonal */
663       if (mat_j[col] >= cstart && mat_j[col] < cend) {
664         aj[diag_so_far++] = mat_j[col] - cstart;
665         dnz++;
666       } else { /* off-diagonal entries */
667         bj[offd_so_far++] = mat_j[col];
668         onz++;
669       }
670     }
671     ailen[j] = dnz;
672     bilen[j] = onz;
673   }
674   PetscFunctionReturn(0);
675 }
676 
677 /*
678     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
679     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
680     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
681     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
682     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
683 */
684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
685 {
686   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
687   Mat            A      = aij->A; /* diagonal part of the matrix */
688   Mat            B      = aij->B; /* offdiagonal part of the matrix */
689   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
690   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
691   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
692   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
693   PetscInt       *ailen = a->ilen,*aj = a->j;
694   PetscInt       *bilen = b->ilen,*bj = b->j;
695   PetscInt       am     = aij->A->rmap->n,j;
696   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
697   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
698   PetscScalar    *aa = a->a,*ba = b->a;
699 
700   PetscFunctionBegin;
701   /* Iterate over all rows of the matrix */
702   for (j=0; j<am; j++) {
703     dnz_row = onz_row = 0;
704     rowstart_offd = full_offd_i[j];
705     rowstart_diag = full_diag_i[j];
706     /*  Iterate over all non-zero columns of the current row */
707     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
708       /* If column is in the diagonal */
709       if (mat_j[col] >= cstart && mat_j[col] < cend) {
710         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
711         aa[rowstart_diag+dnz_row] = mat_a[col];
712         dnz_row++;
713       } else { /* off-diagonal entries */
714         bj[rowstart_offd+onz_row] = mat_j[col];
715         ba[rowstart_offd+onz_row] = mat_a[col];
716         onz_row++;
717       }
718     }
719     ailen[j] = dnz_row;
720     bilen[j] = onz_row;
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
726 {
727   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
730   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
731 
732   PetscFunctionBegin;
733   for (i=0; i<m; i++) {
734     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
735     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
736     if (idxm[i] >= rstart && idxm[i] < rend) {
737       row = idxm[i] - rstart;
738       for (j=0; j<n; j++) {
739         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
740         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
741         if (idxn[j] >= cstart && idxn[j] < cend) {
742           col  = idxn[j] - cstart;
743           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
744         } else {
745           if (!aij->colmap) {
746             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
747           }
748 #if defined(PETSC_USE_CTABLE)
749           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
750           col--;
751 #else
752           col = aij->colmap[idxn[j]] - 1;
753 #endif
754           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
755           else {
756             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
757           }
758         }
759       }
760     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
761   }
762   PetscFunctionReturn(0);
763 }
764 
765 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
766 
767 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
768 {
769   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       nstash,reallocs;
772 
773   PetscFunctionBegin;
774   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
775 
776   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
777   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
778   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
779   PetscFunctionReturn(0);
780 }
781 
782 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
783 {
784   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
785   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
786   PetscErrorCode ierr;
787   PetscMPIInt    n;
788   PetscInt       i,j,rstart,ncols,flg;
789   PetscInt       *row,*col;
790   PetscBool      other_disassembled;
791   PetscScalar    *val;
792 
793   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
794 
795   PetscFunctionBegin;
796   if (!aij->donotstash && !mat->nooffprocentries) {
797     while (1) {
798       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
799       if (!flg) break;
800 
801       for (i=0; i<n; ) {
802         /* Now identify the consecutive vals belonging to the same row */
803         for (j=i,rstart=row[j]; j<n; j++) {
804           if (row[j] != rstart) break;
805         }
806         if (j < n) ncols = j-i;
807         else       ncols = n-i;
808         /* Now assemble all these values with a single function call */
809         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
810 
811         i = j;
812       }
813     }
814     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
815   }
816 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
817   if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU) aij->A->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
818 #endif
819   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
820   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
821 
822   /* determine if any processor has disassembled, if so we must
823      also disassemble ourself, in order that we may reassemble. */
824   /*
825      if nonzero structure of submatrix B cannot change then we know that
826      no processor disassembled thus we can skip this stuff
827   */
828   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
829     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
830     if (mat->was_assembled && !other_disassembled) {
831 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
832       aij->B->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
833 #endif
834       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
835     }
836   }
837   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
838     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
839   }
840   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
841 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
842   if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU && aij->B->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) aij->B->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
843 #endif
844   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
845   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
846 
847   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
848 
849   aij->rowvalues = 0;
850 
851   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
852   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
853 
854   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
856     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
857     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858   }
859 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
860   mat->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
866 {
867   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
872   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
877 {
878   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
879   PetscObjectState sA, sB;
880   PetscInt        *lrows;
881   PetscInt         r, len;
882   PetscBool        cong, lch, gch;
883   PetscErrorCode   ierr;
884 
885   PetscFunctionBegin;
886   /* get locally owned rows */
887   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
888   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
889   /* fix right hand side if needed */
890   if (x && b) {
891     const PetscScalar *xx;
892     PetscScalar       *bb;
893 
894     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
895     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
896     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
897     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
898     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
899     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
900   }
901 
902   sA = mat->A->nonzerostate;
903   sB = mat->B->nonzerostate;
904 
905   if (diag != 0.0 && cong) {
906     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
907     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
908   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
909     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
910     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
911     PetscInt   nnwA, nnwB;
912     PetscBool  nnzA, nnzB;
913 
914     nnwA = aijA->nonew;
915     nnwB = aijB->nonew;
916     nnzA = aijA->keepnonzeropattern;
917     nnzB = aijB->keepnonzeropattern;
918     if (!nnzA) {
919       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
920       aijA->nonew = 0;
921     }
922     if (!nnzB) {
923       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
924       aijB->nonew = 0;
925     }
926     /* Must zero here before the next loop */
927     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
928     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
929     for (r = 0; r < len; ++r) {
930       const PetscInt row = lrows[r] + A->rmap->rstart;
931       if (row >= A->cmap->N) continue;
932       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
933     }
934     aijA->nonew = nnwA;
935     aijB->nonew = nnwB;
936   } else {
937     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
938     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
939   }
940   ierr = PetscFree(lrows);CHKERRQ(ierr);
941   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
942   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
943 
944   /* reduce nonzerostate */
945   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
946   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   if (gch) A->nonzerostate++;
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
952 {
953   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       n = A->rmap->n;
956   PetscInt          i,j,r,m,p = 0,len = 0;
957   PetscInt          *lrows,*owners = A->rmap->range;
958   PetscSFNode       *rrows;
959   PetscSF           sf;
960   const PetscScalar *xx;
961   PetscScalar       *bb,*mask;
962   Vec               xmask,lmask;
963   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
964   const PetscInt    *aj, *ii,*ridx;
965   PetscScalar       *aa;
966 
967   PetscFunctionBegin;
968   /* Create SF where leaves are input rows and roots are owned rows */
969   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
970   for (r = 0; r < n; ++r) lrows[r] = -1;
971   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
972   for (r = 0; r < N; ++r) {
973     const PetscInt idx   = rows[r];
974     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
975     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
976       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
977     }
978     rrows[r].rank  = p;
979     rrows[r].index = rows[r] - owners[p];
980   }
981   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
982   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
983   /* Collect flags for rows to be zeroed */
984   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
985   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
986   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
987   /* Compress and put in row numbers */
988   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
991   /* handle off diagonal part of matrix */
992   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
993   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
994   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
995   for (i=0; i<len; i++) bb[lrows[i]] = 1;
996   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
997   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1000   if (x && b) { /* this code is buggy when the row and column layout don't match */
1001     PetscBool cong;
1002 
1003     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1004     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1005     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1008     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1009   }
1010   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<len; i++) {
1014     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1015   }
1016   /* loop over all elements of off process part of matrix zeroing removed columns*/
1017   if (aij->compressedrow.use) {
1018     m    = aij->compressedrow.nrows;
1019     ii   = aij->compressedrow.i;
1020     ridx = aij->compressedrow.rindex;
1021     for (i=0; i<m; i++) {
1022       n  = ii[i+1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij->a + ii[i];
1025 
1026       for (j=0; j<n; j++) {
1027         if (PetscAbsScalar(mask[*aj])) {
1028           if (b) bb[*ridx] -= *aa*xx[*aj];
1029           *aa = 0.0;
1030         }
1031         aa++;
1032         aj++;
1033       }
1034       ridx++;
1035     }
1036   } else { /* do not use compressed row format */
1037     m = l->B->rmap->n;
1038     for (i=0; i<m; i++) {
1039       n  = ii[i+1] - ii[i];
1040       aj = aij->j + ii[i];
1041       aa = aij->a + ii[i];
1042       for (j=0; j<n; j++) {
1043         if (PetscAbsScalar(mask[*aj])) {
1044           if (b) bb[i] -= *aa*xx[*aj];
1045           *aa = 0.0;
1046         }
1047         aa++;
1048         aj++;
1049       }
1050     }
1051   }
1052   if (x && b) {
1053     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1054     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1055   }
1056   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1057   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1058   ierr = PetscFree(lrows);CHKERRQ(ierr);
1059 
1060   /* only change matrix nonzero state if pattern was allowed to be changed */
1061   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1062     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1063     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071   PetscErrorCode ierr;
1072   PetscInt       nt;
1073   VecScatter     Mvctx = a->Mvctx;
1074 
1075   PetscFunctionBegin;
1076   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1077   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1078 
1079   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1080   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1081   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100   VecScatter     Mvctx = a->Mvctx;
1101 
1102   PetscFunctionBegin;
1103   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1104   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1105   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1106   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1112 {
1113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   /* do nondiagonal part */
1118   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1119   /* do local part */
1120   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1121   /* add partial results together */
1122   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1123   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1128 {
1129   MPI_Comm       comm;
1130   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1131   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1132   IS             Me,Notme;
1133   PetscErrorCode ierr;
1134   PetscInt       M,N,first,last,*notme,i;
1135   PetscBool      lf;
1136   PetscMPIInt    size;
1137 
1138   PetscFunctionBegin;
1139   /* Easy test: symmetric diagonal block */
1140   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1141   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1142   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1143   if (!*f) PetscFunctionReturn(0);
1144   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1145   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1146   if (size == 1) PetscFunctionReturn(0);
1147 
1148   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1149   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1150   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1151   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1152   for (i=0; i<first; i++) notme[i] = i;
1153   for (i=last; i<M; i++) notme[i-last+first] = i;
1154   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1155   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1156   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1157   Aoff = Aoffs[0];
1158   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1159   Boff = Boffs[0];
1160   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1161   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1162   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1163   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1164   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1165   ierr = PetscFree(notme);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1170 {
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1179 {
1180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   /* do nondiagonal part */
1185   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1186   /* do local part */
1187   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1188   /* add partial results together */
1189   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 /*
1195   This only works correctly for square matrices where the subblock A->A is the
1196    diagonal block
1197 */
1198 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1199 {
1200   PetscErrorCode ierr;
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202 
1203   PetscFunctionBegin;
1204   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1205   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1206   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1211 {
1212   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1213   PetscErrorCode ierr;
1214 
1215   PetscFunctionBegin;
1216   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1217   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1222 {
1223   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227 #if defined(PETSC_USE_LOG)
1228   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1229 #endif
1230   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1231   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1232   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1233   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1234 #if defined(PETSC_USE_CTABLE)
1235   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1236 #else
1237   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1238 #endif
1239   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1240   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1241   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1242   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1243   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1244   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1245   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1246 
1247   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1256 #if defined(PETSC_HAVE_ELEMENTAL)
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1258 #endif
1259 #if defined(PETSC_HAVE_HYPRE)
1260   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1261   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1262 #endif
1263   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1264   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1269 {
1270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1271   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1272   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1273   PetscErrorCode ierr;
1274   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1275   int            fd;
1276   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1277   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1278   PetscScalar    *column_values;
1279   PetscInt       message_count,flowcontrolcount;
1280   FILE           *file;
1281 
1282   PetscFunctionBegin;
1283   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1284   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1285   nz   = A->nz + B->nz;
1286   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1287   if (!rank) {
1288     header[0] = MAT_FILE_CLASSID;
1289     header[1] = mat->rmap->N;
1290     header[2] = mat->cmap->N;
1291 
1292     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1294     /* get largest number of rows any processor has */
1295     rlen  = mat->rmap->n;
1296     range = mat->rmap->range;
1297     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1298   } else {
1299     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1300     rlen = mat->rmap->n;
1301   }
1302 
1303   /* load up the local row counts */
1304   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1305   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1306 
1307   /* store the row lengths to the file */
1308   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1309   if (!rank) {
1310     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1311     for (i=1; i<size; i++) {
1312       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1313       rlen = range[i+1] - range[i];
1314       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1316     }
1317     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1318   } else {
1319     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1320     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1322   }
1323   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1324 
1325   /* load up the local column indices */
1326   nzmax = nz; /* th processor needs space a largest processor needs */
1327   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1328   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1329   cnt   = 0;
1330   for (i=0; i<mat->rmap->n; i++) {
1331     for (j=B->i[i]; j<B->i[i+1]; j++) {
1332       if ((col = garray[B->j[j]]) > cstart) break;
1333       column_indices[cnt++] = col;
1334     }
1335     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1336     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1337   }
1338   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1339 
1340   /* store the column indices to the file */
1341   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1342   if (!rank) {
1343     MPI_Status status;
1344     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1345     for (i=1; i<size; i++) {
1346       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1347       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1348       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1349       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1350       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1351     }
1352     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1353   } else {
1354     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1355     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1358   }
1359   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1360 
1361   /* load up the local column values */
1362   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1363   cnt  = 0;
1364   for (i=0; i<mat->rmap->n; i++) {
1365     for (j=B->i[i]; j<B->i[i+1]; j++) {
1366       if (garray[B->j[j]] > cstart) break;
1367       column_values[cnt++] = B->a[j];
1368     }
1369     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1370     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1371   }
1372   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1373 
1374   /* store the column values to the file */
1375   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1376   if (!rank) {
1377     MPI_Status status;
1378     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1379     for (i=1; i<size; i++) {
1380       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1381       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1382       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1383       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1384       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1385     }
1386     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1387   } else {
1388     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1389     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1392   }
1393   ierr = PetscFree(column_values);CHKERRQ(ierr);
1394 
1395   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1396   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #include <petscdraw.h>
1401 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1402 {
1403   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1404   PetscErrorCode    ierr;
1405   PetscMPIInt       rank = aij->rank,size = aij->size;
1406   PetscBool         isdraw,iascii,isbinary;
1407   PetscViewer       sviewer;
1408   PetscViewerFormat format;
1409 
1410   PetscFunctionBegin;
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1414   if (iascii) {
1415     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1416     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1417       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1418       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1419       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1420       for (i=0; i<(PetscInt)size; i++) {
1421         nmax = PetscMax(nmax,nz[i]);
1422         nmin = PetscMin(nmin,nz[i]);
1423         navg += nz[i];
1424       }
1425       ierr = PetscFree(nz);CHKERRQ(ierr);
1426       navg = navg/size;
1427       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1428       PetscFunctionReturn(0);
1429     }
1430     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1431     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1432       MatInfo   info;
1433       PetscBool inodes;
1434 
1435       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1436       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1437       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1438       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1439       if (!inodes) {
1440         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1441                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1442       } else {
1443         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1444                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1445       }
1446       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1448       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1449       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1450       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1451       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1452       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1453       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1454       PetscFunctionReturn(0);
1455     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1456       PetscInt inodecount,inodelimit,*inodes;
1457       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1458       if (inodes) {
1459         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1460       } else {
1461         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1462       }
1463       PetscFunctionReturn(0);
1464     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1465       PetscFunctionReturn(0);
1466     }
1467   } else if (isbinary) {
1468     if (size == 1) {
1469       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1470       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1471     } else {
1472       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1473     }
1474     PetscFunctionReturn(0);
1475   } else if (iascii && size == 1) {
1476     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1477     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1478     PetscFunctionReturn(0);
1479   } else if (isdraw) {
1480     PetscDraw draw;
1481     PetscBool isnull;
1482     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1483     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1484     if (isnull) PetscFunctionReturn(0);
1485   }
1486 
1487   { /* assemble the entire matrix onto first processor */
1488     Mat A = NULL, Av;
1489     IS  isrow,iscol;
1490 
1491     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1492     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1493     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1494     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1495 /*  The commented code uses MatCreateSubMatrices instead */
1496 /*
1497     Mat *AA, A = NULL, Av;
1498     IS  isrow,iscol;
1499 
1500     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1501     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1502     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1503     if (!rank) {
1504        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1505        A    = AA[0];
1506        Av   = AA[0];
1507     }
1508     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1509 */
1510     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1511     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1512     /*
1513        Everyone has to call to draw the matrix since the graphics waits are
1514        synchronized across all processors that share the PetscDraw object
1515     */
1516     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1517     if (!rank) {
1518       if (((PetscObject)mat)->name) {
1519         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1520       }
1521       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1522     }
1523     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1524     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1525     ierr = MatDestroy(&A);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1531 {
1532   PetscErrorCode ierr;
1533   PetscBool      iascii,isdraw,issocket,isbinary;
1534 
1535   PetscFunctionBegin;
1536   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1537   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1538   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1539   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1540   if (iascii || isdraw || isbinary || issocket) {
1541     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1547 {
1548   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1549   PetscErrorCode ierr;
1550   Vec            bb1 = 0;
1551   PetscBool      hasop;
1552 
1553   PetscFunctionBegin;
1554   if (flag == SOR_APPLY_UPPER) {
1555     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1556     PetscFunctionReturn(0);
1557   }
1558 
1559   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1560     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1561   }
1562 
1563   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1564     if (flag & SOR_ZERO_INITIAL_GUESS) {
1565       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1566       its--;
1567     }
1568 
1569     while (its--) {
1570       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572 
1573       /* update rhs: bb1 = bb - B*x */
1574       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1575       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1576 
1577       /* local sweep */
1578       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1579     }
1580   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1581     if (flag & SOR_ZERO_INITIAL_GUESS) {
1582       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1583       its--;
1584     }
1585     while (its--) {
1586       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588 
1589       /* update rhs: bb1 = bb - B*x */
1590       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1591       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1592 
1593       /* local sweep */
1594       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1595     }
1596   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1597     if (flag & SOR_ZERO_INITIAL_GUESS) {
1598       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1599       its--;
1600     }
1601     while (its--) {
1602       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604 
1605       /* update rhs: bb1 = bb - B*x */
1606       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1607       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1608 
1609       /* local sweep */
1610       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1611     }
1612   } else if (flag & SOR_EISENSTAT) {
1613     Vec xx1;
1614 
1615     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1616     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1617 
1618     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1619     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1620     if (!mat->diag) {
1621       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1622       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1623     }
1624     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1625     if (hasop) {
1626       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1627     } else {
1628       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1629     }
1630     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1631 
1632     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1633 
1634     /* local sweep */
1635     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1636     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1637     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1638   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1639 
1640   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1641 
1642   matin->factorerrortype = mat->A->factorerrortype;
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1647 {
1648   Mat            aA,aB,Aperm;
1649   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1650   PetscScalar    *aa,*ba;
1651   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1652   PetscSF        rowsf,sf;
1653   IS             parcolp = NULL;
1654   PetscBool      done;
1655   PetscErrorCode ierr;
1656 
1657   PetscFunctionBegin;
1658   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1659   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1660   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1661   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1662 
1663   /* Invert row permutation to find out where my rows should go */
1664   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1665   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1666   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1667   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1668   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1669   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1670 
1671   /* Invert column permutation to find out where my columns should go */
1672   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1673   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1674   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1675   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1676   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1677   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1678   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1679 
1680   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1681   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1682   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1683 
1684   /* Find out where my gcols should go */
1685   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1686   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1687   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1688   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1689   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1690   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1691   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1692   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1693 
1694   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1695   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1696   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1697   for (i=0; i<m; i++) {
1698     PetscInt row = rdest[i],rowner;
1699     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1700     for (j=ai[i]; j<ai[i+1]; j++) {
1701       PetscInt cowner,col = cdest[aj[j]];
1702       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1703       if (rowner == cowner) dnnz[i]++;
1704       else onnz[i]++;
1705     }
1706     for (j=bi[i]; j<bi[i+1]; j++) {
1707       PetscInt cowner,col = gcdest[bj[j]];
1708       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1709       if (rowner == cowner) dnnz[i]++;
1710       else onnz[i]++;
1711     }
1712   }
1713   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1714   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1715   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1716   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1717   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1718 
1719   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1720   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1721   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1722   for (i=0; i<m; i++) {
1723     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1724     PetscInt j0,rowlen;
1725     rowlen = ai[i+1] - ai[i];
1726     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1727       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1728       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1729     }
1730     rowlen = bi[i+1] - bi[i];
1731     for (j0=j=0; j<rowlen; j0=j) {
1732       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1733       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1734     }
1735   }
1736   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1737   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1738   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1739   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1740   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1741   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1742   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1743   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1744   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1745   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1746   *B = Aperm;
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1751 {
1752   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1757   if (ghosts) *ghosts = aij->garray;
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1762 {
1763   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1764   Mat            A    = mat->A,B = mat->B;
1765   PetscErrorCode ierr;
1766   PetscReal      isend[5],irecv[5];
1767 
1768   PetscFunctionBegin;
1769   info->block_size = 1.0;
1770   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1771 
1772   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1773   isend[3] = info->memory;  isend[4] = info->mallocs;
1774 
1775   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1776 
1777   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1778   isend[3] += info->memory;  isend[4] += info->mallocs;
1779   if (flag == MAT_LOCAL) {
1780     info->nz_used      = isend[0];
1781     info->nz_allocated = isend[1];
1782     info->nz_unneeded  = isend[2];
1783     info->memory       = isend[3];
1784     info->mallocs      = isend[4];
1785   } else if (flag == MAT_GLOBAL_MAX) {
1786     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1787 
1788     info->nz_used      = irecv[0];
1789     info->nz_allocated = irecv[1];
1790     info->nz_unneeded  = irecv[2];
1791     info->memory       = irecv[3];
1792     info->mallocs      = irecv[4];
1793   } else if (flag == MAT_GLOBAL_SUM) {
1794     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1795 
1796     info->nz_used      = irecv[0];
1797     info->nz_allocated = irecv[1];
1798     info->nz_unneeded  = irecv[2];
1799     info->memory       = irecv[3];
1800     info->mallocs      = irecv[4];
1801   }
1802   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1803   info->fill_ratio_needed = 0;
1804   info->factor_mallocs    = 0;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1809 {
1810   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1811   PetscErrorCode ierr;
1812 
1813   PetscFunctionBegin;
1814   switch (op) {
1815   case MAT_NEW_NONZERO_LOCATIONS:
1816   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1817   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1818   case MAT_KEEP_NONZERO_PATTERN:
1819   case MAT_NEW_NONZERO_LOCATION_ERR:
1820   case MAT_USE_INODES:
1821   case MAT_IGNORE_ZERO_ENTRIES:
1822     MatCheckPreallocated(A,1);
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1825     break;
1826   case MAT_ROW_ORIENTED:
1827     MatCheckPreallocated(A,1);
1828     a->roworiented = flg;
1829 
1830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1831     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1832     break;
1833   case MAT_NEW_DIAGONALS:
1834   case MAT_SORTED_FULL:
1835     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1836     break;
1837   case MAT_IGNORE_OFF_PROC_ENTRIES:
1838     a->donotstash = flg;
1839     break;
1840   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1841   case MAT_SPD:
1842   case MAT_SYMMETRIC:
1843   case MAT_STRUCTURALLY_SYMMETRIC:
1844   case MAT_HERMITIAN:
1845   case MAT_SYMMETRY_ETERNAL:
1846     break;
1847   case MAT_SUBMAT_SINGLEIS:
1848     A->submat_singleis = flg;
1849     break;
1850   case MAT_STRUCTURE_ONLY:
1851     /* The option is handled directly by MatSetOption() */
1852     break;
1853   default:
1854     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1860 {
1861   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1862   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1863   PetscErrorCode ierr;
1864   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1865   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1866   PetscInt       *cmap,*idx_p;
1867 
1868   PetscFunctionBegin;
1869   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1870   mat->getrowactive = PETSC_TRUE;
1871 
1872   if (!mat->rowvalues && (idx || v)) {
1873     /*
1874         allocate enough space to hold information from the longest row.
1875     */
1876     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1877     PetscInt   max = 1,tmp;
1878     for (i=0; i<matin->rmap->n; i++) {
1879       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1880       if (max < tmp) max = tmp;
1881     }
1882     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1883   }
1884 
1885   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1886   lrow = row - rstart;
1887 
1888   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1889   if (!v)   {pvA = 0; pvB = 0;}
1890   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1891   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1892   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1893   nztot = nzA + nzB;
1894 
1895   cmap = mat->garray;
1896   if (v  || idx) {
1897     if (nztot) {
1898       /* Sort by increasing column numbers, assuming A and B already sorted */
1899       PetscInt imark = -1;
1900       if (v) {
1901         *v = v_p = mat->rowvalues;
1902         for (i=0; i<nzB; i++) {
1903           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1904           else break;
1905         }
1906         imark = i;
1907         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1908         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1909       }
1910       if (idx) {
1911         *idx = idx_p = mat->rowindices;
1912         if (imark > -1) {
1913           for (i=0; i<imark; i++) {
1914             idx_p[i] = cmap[cworkB[i]];
1915           }
1916         } else {
1917           for (i=0; i<nzB; i++) {
1918             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1919             else break;
1920           }
1921           imark = i;
1922         }
1923         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1924         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1925       }
1926     } else {
1927       if (idx) *idx = 0;
1928       if (v)   *v   = 0;
1929     }
1930   }
1931   *nz  = nztot;
1932   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1933   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1938 {
1939   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1940 
1941   PetscFunctionBegin;
1942   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1943   aij->getrowactive = PETSC_FALSE;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1948 {
1949   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1950   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1951   PetscErrorCode ierr;
1952   PetscInt       i,j,cstart = mat->cmap->rstart;
1953   PetscReal      sum = 0.0;
1954   MatScalar      *v;
1955 
1956   PetscFunctionBegin;
1957   if (aij->size == 1) {
1958     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1959   } else {
1960     if (type == NORM_FROBENIUS) {
1961       v = amat->a;
1962       for (i=0; i<amat->nz; i++) {
1963         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1964       }
1965       v = bmat->a;
1966       for (i=0; i<bmat->nz; i++) {
1967         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1968       }
1969       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1970       *norm = PetscSqrtReal(*norm);
1971       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1972     } else if (type == NORM_1) { /* max column norm */
1973       PetscReal *tmp,*tmp2;
1974       PetscInt  *jj,*garray = aij->garray;
1975       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1976       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1977       *norm = 0.0;
1978       v     = amat->a; jj = amat->j;
1979       for (j=0; j<amat->nz; j++) {
1980         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1981       }
1982       v = bmat->a; jj = bmat->j;
1983       for (j=0; j<bmat->nz; j++) {
1984         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1985       }
1986       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1987       for (j=0; j<mat->cmap->N; j++) {
1988         if (tmp2[j] > *norm) *norm = tmp2[j];
1989       }
1990       ierr = PetscFree(tmp);CHKERRQ(ierr);
1991       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else if (type == NORM_INFINITY) { /* max row norm */
1994       PetscReal ntemp = 0.0;
1995       for (j=0; j<aij->A->rmap->n; j++) {
1996         v   = amat->a + amat->i[j];
1997         sum = 0.0;
1998         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1999           sum += PetscAbsScalar(*v); v++;
2000         }
2001         v = bmat->a + bmat->i[j];
2002         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2003           sum += PetscAbsScalar(*v); v++;
2004         }
2005         if (sum > ntemp) ntemp = sum;
2006       }
2007       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2008       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2009     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2010   }
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2015 {
2016   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2017   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2018   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2019   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2020   PetscErrorCode  ierr;
2021   Mat             B,A_diag,*B_diag;
2022   const MatScalar *array;
2023 
2024   PetscFunctionBegin;
2025   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2026   ai = Aloc->i; aj = Aloc->j;
2027   bi = Bloc->i; bj = Bloc->j;
2028   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2029     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2030     PetscSFNode          *oloc;
2031     PETSC_UNUSED PetscSF sf;
2032 
2033     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2034     /* compute d_nnz for preallocation */
2035     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2036     for (i=0; i<ai[ma]; i++) {
2037       d_nnz[aj[i]]++;
2038     }
2039     /* compute local off-diagonal contributions */
2040     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2041     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2042     /* map those to global */
2043     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2044     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2045     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2046     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2047     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2048     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2050 
2051     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2052     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2053     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2054     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2055     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2056     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2057   } else {
2058     B    = *matout;
2059     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2060   }
2061 
2062   b           = (Mat_MPIAIJ*)B->data;
2063   A_diag      = a->A;
2064   B_diag      = &b->A;
2065   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2066   A_diag_ncol = A_diag->cmap->N;
2067   B_diag_ilen = sub_B_diag->ilen;
2068   B_diag_i    = sub_B_diag->i;
2069 
2070   /* Set ilen for diagonal of B */
2071   for (i=0; i<A_diag_ncol; i++) {
2072     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2073   }
2074 
2075   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2076   very quickly (=without using MatSetValues), because all writes are local. */
2077   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2078 
2079   /* copy over the B part */
2080   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2081   array = Bloc->a;
2082   row   = A->rmap->rstart;
2083   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2084   cols_tmp = cols;
2085   for (i=0; i<mb; i++) {
2086     ncol = bi[i+1]-bi[i];
2087     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2088     row++;
2089     array += ncol; cols_tmp += ncol;
2090   }
2091   ierr = PetscFree(cols);CHKERRQ(ierr);
2092 
2093   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2094   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2096     *matout = B;
2097   } else {
2098     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2099   }
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2104 {
2105   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2106   Mat            a    = aij->A,b = aij->B;
2107   PetscErrorCode ierr;
2108   PetscInt       s1,s2,s3;
2109 
2110   PetscFunctionBegin;
2111   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2112   if (rr) {
2113     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2114     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2115     /* Overlap communication with computation. */
2116     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2117   }
2118   if (ll) {
2119     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2120     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2121     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2122   }
2123   /* scale  the diagonal block */
2124   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2125 
2126   if (rr) {
2127     /* Do a scatter end and then right scale the off-diagonal block */
2128     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2129     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2130   }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2135 {
2136   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2145 {
2146   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2147   Mat            a,b,c,d;
2148   PetscBool      flg;
2149   PetscErrorCode ierr;
2150 
2151   PetscFunctionBegin;
2152   a = matA->A; b = matA->B;
2153   c = matB->A; d = matB->B;
2154 
2155   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2156   if (flg) {
2157     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2158   }
2159   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2164 {
2165   PetscErrorCode ierr;
2166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2167   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2168 
2169   PetscFunctionBegin;
2170   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2171   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2172     /* because of the column compression in the off-processor part of the matrix a->B,
2173        the number of columns in a->B and b->B may be different, hence we cannot call
2174        the MatCopy() directly on the two parts. If need be, we can provide a more
2175        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2176        then copying the submatrices */
2177     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2178   } else {
2179     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2180     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2181   }
2182   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2187 {
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 /*
2196    Computes the number of nonzeros per row needed for preallocation when X and Y
2197    have different nonzero structure.
2198 */
2199 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2200 {
2201   PetscInt       i,j,k,nzx,nzy;
2202 
2203   PetscFunctionBegin;
2204   /* Set the number of nonzeros in the new matrix */
2205   for (i=0; i<m; i++) {
2206     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2207     nzx = xi[i+1] - xi[i];
2208     nzy = yi[i+1] - yi[i];
2209     nnz[i] = 0;
2210     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2211       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2212       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2213       nnz[i]++;
2214     }
2215     for (; k<nzy; k++) nnz[i]++;
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2221 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2222 {
2223   PetscErrorCode ierr;
2224   PetscInt       m = Y->rmap->N;
2225   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2226   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2227 
2228   PetscFunctionBegin;
2229   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2234 {
2235   PetscErrorCode ierr;
2236   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2237   PetscBLASInt   bnz,one=1;
2238   Mat_SeqAIJ     *x,*y;
2239 
2240   PetscFunctionBegin;
2241   if (str == SAME_NONZERO_PATTERN) {
2242     PetscScalar alpha = a;
2243     x    = (Mat_SeqAIJ*)xx->A->data;
2244     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2245     y    = (Mat_SeqAIJ*)yy->A->data;
2246     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2247     x    = (Mat_SeqAIJ*)xx->B->data;
2248     y    = (Mat_SeqAIJ*)yy->B->data;
2249     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2250     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2251     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2252     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2253        will be updated */
2254 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2255     if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
2256       Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
2257     }
2258 #endif
2259   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2260     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2261   } else {
2262     Mat      B;
2263     PetscInt *nnz_d,*nnz_o;
2264     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2265     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2266     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2267     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2268     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2269     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2270     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2271     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2272     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2273     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2274     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2275     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2276     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2277     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2278   }
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2283 
2284 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2285 {
2286 #if defined(PETSC_USE_COMPLEX)
2287   PetscErrorCode ierr;
2288   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2289 
2290   PetscFunctionBegin;
2291   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2292   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2293 #else
2294   PetscFunctionBegin;
2295 #endif
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2300 {
2301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2306   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2311 {
2312   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2317   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2322 {
2323   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2324   PetscErrorCode ierr;
2325   PetscInt       i,*idxb = 0;
2326   PetscScalar    *va,*vb;
2327   Vec            vtmp;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2331   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2332   if (idx) {
2333     for (i=0; i<A->rmap->n; i++) {
2334       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2335     }
2336   }
2337 
2338   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2339   if (idx) {
2340     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2341   }
2342   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2343   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2344 
2345   for (i=0; i<A->rmap->n; i++) {
2346     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2347       va[i] = vb[i];
2348       if (idx) idx[i] = a->garray[idxb[i]];
2349     }
2350   }
2351 
2352   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2353   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2354   ierr = PetscFree(idxb);CHKERRQ(ierr);
2355   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2360 {
2361   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2362   PetscErrorCode ierr;
2363   PetscInt       i,*idxb = 0;
2364   PetscScalar    *va,*vb;
2365   Vec            vtmp;
2366 
2367   PetscFunctionBegin;
2368   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2369   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2370   if (idx) {
2371     for (i=0; i<A->cmap->n; i++) {
2372       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2373     }
2374   }
2375 
2376   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2377   if (idx) {
2378     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2379   }
2380   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2381   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2382 
2383   for (i=0; i<A->rmap->n; i++) {
2384     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2385       va[i] = vb[i];
2386       if (idx) idx[i] = a->garray[idxb[i]];
2387     }
2388   }
2389 
2390   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2391   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2392   ierr = PetscFree(idxb);CHKERRQ(ierr);
2393   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2398 {
2399   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2400   PetscInt       n      = A->rmap->n;
2401   PetscInt       cstart = A->cmap->rstart;
2402   PetscInt       *cmap  = mat->garray;
2403   PetscInt       *diagIdx, *offdiagIdx;
2404   Vec            diagV, offdiagV;
2405   PetscScalar    *a, *diagA, *offdiagA;
2406   PetscInt       r;
2407   PetscErrorCode ierr;
2408 
2409   PetscFunctionBegin;
2410   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2411   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2412   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2413   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2414   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2415   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2416   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2417   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2418   for (r = 0; r < n; ++r) {
2419     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2420       a[r]   = diagA[r];
2421       idx[r] = cstart + diagIdx[r];
2422     } else {
2423       a[r]   = offdiagA[r];
2424       idx[r] = cmap[offdiagIdx[r]];
2425     }
2426   }
2427   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2428   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2429   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2430   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2431   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2432   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2437 {
2438   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2439   PetscInt       n      = A->rmap->n;
2440   PetscInt       cstart = A->cmap->rstart;
2441   PetscInt       *cmap  = mat->garray;
2442   PetscInt       *diagIdx, *offdiagIdx;
2443   Vec            diagV, offdiagV;
2444   PetscScalar    *a, *diagA, *offdiagA;
2445   PetscInt       r;
2446   PetscErrorCode ierr;
2447 
2448   PetscFunctionBegin;
2449   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2451   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2452   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2453   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2454   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2455   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2456   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2457   for (r = 0; r < n; ++r) {
2458     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2459       a[r]   = diagA[r];
2460       idx[r] = cstart + diagIdx[r];
2461     } else {
2462       a[r]   = offdiagA[r];
2463       idx[r] = cmap[offdiagIdx[r]];
2464     }
2465   }
2466   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2468   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2469   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2470   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2471   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2476 {
2477   PetscErrorCode ierr;
2478   Mat            *dummy;
2479 
2480   PetscFunctionBegin;
2481   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2482   *newmat = *dummy;
2483   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2488 {
2489   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2490   PetscErrorCode ierr;
2491 
2492   PetscFunctionBegin;
2493   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2494   A->factorerrortype = a->A->factorerrortype;
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2499 {
2500   PetscErrorCode ierr;
2501   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2502 
2503   PetscFunctionBegin;
2504   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2505   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2506   if (x->assembled) {
2507     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2508   } else {
2509     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2510   }
2511   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2512   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2517 {
2518   PetscFunctionBegin;
2519   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2520   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 /*@
2525    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2526 
2527    Collective on Mat
2528 
2529    Input Parameters:
2530 +    A - the matrix
2531 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2532 
2533  Level: advanced
2534 
2535 @*/
2536 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2537 {
2538   PetscErrorCode       ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2546 {
2547   PetscErrorCode       ierr;
2548   PetscBool            sc = PETSC_FALSE,flg;
2549 
2550   PetscFunctionBegin;
2551   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2552   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2553   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2554   if (flg) {
2555     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2556   }
2557   ierr = PetscOptionsTail();CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2562 {
2563   PetscErrorCode ierr;
2564   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2565   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2566 
2567   PetscFunctionBegin;
2568   if (!Y->preallocated) {
2569     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2570   } else if (!aij->nz) {
2571     PetscInt nonew = aij->nonew;
2572     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2573     aij->nonew = nonew;
2574   }
2575   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2580 {
2581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2582   PetscErrorCode ierr;
2583 
2584   PetscFunctionBegin;
2585   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2586   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2587   if (d) {
2588     PetscInt rstart;
2589     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2590     *d += rstart;
2591 
2592   }
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2597 {
2598   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2599   PetscErrorCode ierr;
2600 
2601   PetscFunctionBegin;
2602   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 /* -------------------------------------------------------------------*/
2607 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2608                                        MatGetRow_MPIAIJ,
2609                                        MatRestoreRow_MPIAIJ,
2610                                        MatMult_MPIAIJ,
2611                                 /* 4*/ MatMultAdd_MPIAIJ,
2612                                        MatMultTranspose_MPIAIJ,
2613                                        MatMultTransposeAdd_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*10*/ 0,
2618                                        0,
2619                                        0,
2620                                        MatSOR_MPIAIJ,
2621                                        MatTranspose_MPIAIJ,
2622                                 /*15*/ MatGetInfo_MPIAIJ,
2623                                        MatEqual_MPIAIJ,
2624                                        MatGetDiagonal_MPIAIJ,
2625                                        MatDiagonalScale_MPIAIJ,
2626                                        MatNorm_MPIAIJ,
2627                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2628                                        MatAssemblyEnd_MPIAIJ,
2629                                        MatSetOption_MPIAIJ,
2630                                        MatZeroEntries_MPIAIJ,
2631                                 /*24*/ MatZeroRows_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*29*/ MatSetUp_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        MatGetDiagonalBlock_MPIAIJ,
2640                                        0,
2641                                 /*34*/ MatDuplicate_MPIAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                 /*39*/ MatAXPY_MPIAIJ,
2647                                        MatCreateSubMatrices_MPIAIJ,
2648                                        MatIncreaseOverlap_MPIAIJ,
2649                                        MatGetValues_MPIAIJ,
2650                                        MatCopy_MPIAIJ,
2651                                 /*44*/ MatGetRowMax_MPIAIJ,
2652                                        MatScale_MPIAIJ,
2653                                        MatShift_MPIAIJ,
2654                                        MatDiagonalSet_MPIAIJ,
2655                                        MatZeroRowsColumns_MPIAIJ,
2656                                 /*49*/ MatSetRandom_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2662                                        0,
2663                                        MatSetUnfactored_MPIAIJ,
2664                                        MatPermute_MPIAIJ,
2665                                        0,
2666                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2667                                        MatDestroy_MPIAIJ,
2668                                        MatView_MPIAIJ,
2669                                        0,
2670                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2671                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2672                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2677                                        MatGetRowMinAbs_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                 /*75*/ MatFDColoringApply_AIJ,
2683                                        MatSetFromOptions_MPIAIJ,
2684                                        0,
2685                                        0,
2686                                        MatFindZeroDiagonals_MPIAIJ,
2687                                 /*80*/ 0,
2688                                        0,
2689                                        0,
2690                                 /*83*/ MatLoad_MPIAIJ,
2691                                        MatIsSymmetric_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2697                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2698                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2699                                        MatPtAP_MPIAIJ_MPIAIJ,
2700                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2701                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                 /*99*/ 0,
2707                                        0,
2708                                        0,
2709                                        MatConjugate_MPIAIJ,
2710                                        0,
2711                                 /*104*/MatSetValuesRow_MPIAIJ,
2712                                        MatRealPart_MPIAIJ,
2713                                        MatImaginaryPart_MPIAIJ,
2714                                        0,
2715                                        0,
2716                                 /*109*/0,
2717                                        0,
2718                                        MatGetRowMin_MPIAIJ,
2719                                        0,
2720                                        MatMissingDiagonal_MPIAIJ,
2721                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2722                                        0,
2723                                        MatGetGhosts_MPIAIJ,
2724                                        0,
2725                                        0,
2726                                 /*119*/0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        MatGetMultiProcBlock_MPIAIJ,
2731                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2732                                        MatGetColumnNorms_MPIAIJ,
2733                                        MatInvertBlockDiagonal_MPIAIJ,
2734                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2735                                        MatCreateSubMatricesMPI_MPIAIJ,
2736                                 /*129*/0,
2737                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2738                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2739                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2740                                        0,
2741                                 /*134*/0,
2742                                        0,
2743                                        MatRARt_MPIAIJ_MPIAIJ,
2744                                        0,
2745                                        0,
2746                                 /*139*/MatSetBlockSizes_MPIAIJ,
2747                                        0,
2748                                        0,
2749                                        MatFDColoringSetUp_MPIXAIJ,
2750                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2751                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2752 };
2753 
2754 /* ----------------------------------------------------------------------------------------*/
2755 
2756 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2757 {
2758   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2759   PetscErrorCode ierr;
2760 
2761   PetscFunctionBegin;
2762   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2763   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2768 {
2769   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2770   PetscErrorCode ierr;
2771 
2772   PetscFunctionBegin;
2773   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2774   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2779 {
2780   Mat_MPIAIJ     *b;
2781   PetscErrorCode ierr;
2782   PetscMPIInt    size;
2783 
2784   PetscFunctionBegin;
2785   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2786   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2787   b = (Mat_MPIAIJ*)B->data;
2788 
2789 #if defined(PETSC_USE_CTABLE)
2790   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2791 #else
2792   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2793 #endif
2794   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2795   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2796   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2797 
2798   /* Because the B will have been resized we simply destroy it and create a new one each time */
2799   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2800   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2801   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2802   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2803   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2804   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2805   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2806 
2807   if (!B->preallocated) {
2808     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2809     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2810     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2811     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2812     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2813   }
2814 
2815   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2816   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2817   B->preallocated  = PETSC_TRUE;
2818   B->was_assembled = PETSC_FALSE;
2819   B->assembled     = PETSC_FALSE;
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2824 {
2825   Mat_MPIAIJ     *b;
2826   PetscErrorCode ierr;
2827 
2828   PetscFunctionBegin;
2829   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2830   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2831   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2832   b = (Mat_MPIAIJ*)B->data;
2833 
2834 #if defined(PETSC_USE_CTABLE)
2835   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2836 #else
2837   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2838 #endif
2839   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2840   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2841   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2842 
2843   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2844   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2845   B->preallocated  = PETSC_TRUE;
2846   B->was_assembled = PETSC_FALSE;
2847   B->assembled = PETSC_FALSE;
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2852 {
2853   Mat            mat;
2854   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2855   PetscErrorCode ierr;
2856 
2857   PetscFunctionBegin;
2858   *newmat = 0;
2859   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2860   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2861   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2862   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2863   a       = (Mat_MPIAIJ*)mat->data;
2864 
2865   mat->factortype   = matin->factortype;
2866   mat->assembled    = PETSC_TRUE;
2867   mat->insertmode   = NOT_SET_VALUES;
2868   mat->preallocated = PETSC_TRUE;
2869 
2870   a->size         = oldmat->size;
2871   a->rank         = oldmat->rank;
2872   a->donotstash   = oldmat->donotstash;
2873   a->roworiented  = oldmat->roworiented;
2874   a->rowindices   = 0;
2875   a->rowvalues    = 0;
2876   a->getrowactive = PETSC_FALSE;
2877 
2878   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2879   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2880 
2881   if (oldmat->colmap) {
2882 #if defined(PETSC_USE_CTABLE)
2883     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2884 #else
2885     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2886     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2887     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2888 #endif
2889   } else a->colmap = 0;
2890   if (oldmat->garray) {
2891     PetscInt len;
2892     len  = oldmat->B->cmap->n;
2893     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2894     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2895     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2896   } else a->garray = 0;
2897 
2898   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2899   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2900   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2901   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2902 
2903   if (oldmat->Mvctx_mpi1) {
2904     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2905     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2906   }
2907 
2908   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2909   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2910   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2911   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2912   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2913   *newmat = mat;
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2918 {
2919   PetscBool      isbinary, ishdf5;
2920   PetscErrorCode ierr;
2921 
2922   PetscFunctionBegin;
2923   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2924   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2925   /* force binary viewer to load .info file if it has not yet done so */
2926   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2927   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2928   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2929   if (isbinary) {
2930     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2931   } else if (ishdf5) {
2932 #if defined(PETSC_HAVE_HDF5)
2933     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2934 #else
2935     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2936 #endif
2937   } else {
2938     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2939   }
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2944 {
2945   PetscScalar    *vals,*svals;
2946   MPI_Comm       comm;
2947   PetscErrorCode ierr;
2948   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2949   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2950   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2951   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2952   PetscInt       cend,cstart,n,*rowners;
2953   int            fd;
2954   PetscInt       bs = newMat->rmap->bs;
2955 
2956   PetscFunctionBegin;
2957   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2958   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2959   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2960   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2961   if (!rank) {
2962     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2963     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2964     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2965   }
2966 
2967   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2968   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2969   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2970   if (bs < 0) bs = 1;
2971 
2972   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2973   M    = header[1]; N = header[2];
2974 
2975   /* If global sizes are set, check if they are consistent with that given in the file */
2976   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2977   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2978 
2979   /* determine ownership of all (block) rows */
2980   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2981   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2982   else m = newMat->rmap->n; /* Set by user */
2983 
2984   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2985   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2986 
2987   /* First process needs enough room for process with most rows */
2988   if (!rank) {
2989     mmax = rowners[1];
2990     for (i=2; i<=size; i++) {
2991       mmax = PetscMax(mmax, rowners[i]);
2992     }
2993   } else mmax = -1;             /* unused, but compilers complain */
2994 
2995   rowners[0] = 0;
2996   for (i=2; i<=size; i++) {
2997     rowners[i] += rowners[i-1];
2998   }
2999   rstart = rowners[rank];
3000   rend   = rowners[rank+1];
3001 
3002   /* distribute row lengths to all processors */
3003   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3004   if (!rank) {
3005     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3006     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3007     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3008     for (j=0; j<m; j++) {
3009       procsnz[0] += ourlens[j];
3010     }
3011     for (i=1; i<size; i++) {
3012       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3013       /* calculate the number of nonzeros on each processor */
3014       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3015         procsnz[i] += rowlengths[j];
3016       }
3017       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3018     }
3019     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3020   } else {
3021     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3022   }
3023 
3024   if (!rank) {
3025     /* determine max buffer needed and allocate it */
3026     maxnz = 0;
3027     for (i=0; i<size; i++) {
3028       maxnz = PetscMax(maxnz,procsnz[i]);
3029     }
3030     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3031 
3032     /* read in my part of the matrix column indices  */
3033     nz   = procsnz[0];
3034     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3035     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3036 
3037     /* read in every one elses and ship off */
3038     for (i=1; i<size; i++) {
3039       nz   = procsnz[i];
3040       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3041       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3042     }
3043     ierr = PetscFree(cols);CHKERRQ(ierr);
3044   } else {
3045     /* determine buffer space needed for message */
3046     nz = 0;
3047     for (i=0; i<m; i++) {
3048       nz += ourlens[i];
3049     }
3050     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3051 
3052     /* receive message of column indices*/
3053     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3054   }
3055 
3056   /* determine column ownership if matrix is not square */
3057   if (N != M) {
3058     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3059     else n = newMat->cmap->n;
3060     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3061     cstart = cend - n;
3062   } else {
3063     cstart = rstart;
3064     cend   = rend;
3065     n      = cend - cstart;
3066   }
3067 
3068   /* loop over local rows, determining number of off diagonal entries */
3069   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3070   jj   = 0;
3071   for (i=0; i<m; i++) {
3072     for (j=0; j<ourlens[i]; j++) {
3073       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3074       jj++;
3075     }
3076   }
3077 
3078   for (i=0; i<m; i++) {
3079     ourlens[i] -= offlens[i];
3080   }
3081   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3082 
3083   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3084 
3085   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3086 
3087   for (i=0; i<m; i++) {
3088     ourlens[i] += offlens[i];
3089   }
3090 
3091   if (!rank) {
3092     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3093 
3094     /* read in my part of the matrix numerical values  */
3095     nz   = procsnz[0];
3096     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3097 
3098     /* insert into matrix */
3099     jj      = rstart;
3100     smycols = mycols;
3101     svals   = vals;
3102     for (i=0; i<m; i++) {
3103       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3104       smycols += ourlens[i];
3105       svals   += ourlens[i];
3106       jj++;
3107     }
3108 
3109     /* read in other processors and ship out */
3110     for (i=1; i<size; i++) {
3111       nz   = procsnz[i];
3112       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3113       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3114     }
3115     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3116   } else {
3117     /* receive numeric values */
3118     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3119 
3120     /* receive message of values*/
3121     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3122 
3123     /* insert into matrix */
3124     jj      = rstart;
3125     smycols = mycols;
3126     svals   = vals;
3127     for (i=0; i<m; i++) {
3128       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3129       smycols += ourlens[i];
3130       svals   += ourlens[i];
3131       jj++;
3132     }
3133   }
3134   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3135   ierr = PetscFree(vals);CHKERRQ(ierr);
3136   ierr = PetscFree(mycols);CHKERRQ(ierr);
3137   ierr = PetscFree(rowners);CHKERRQ(ierr);
3138   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3139   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140   PetscFunctionReturn(0);
3141 }
3142 
3143 /* Not scalable because of ISAllGather() unless getting all columns. */
3144 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3145 {
3146   PetscErrorCode ierr;
3147   IS             iscol_local;
3148   PetscBool      isstride;
3149   PetscMPIInt    lisstride=0,gisstride;
3150 
3151   PetscFunctionBegin;
3152   /* check if we are grabbing all columns*/
3153   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3154 
3155   if (isstride) {
3156     PetscInt  start,len,mstart,mlen;
3157     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3158     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3159     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3160     if (mstart == start && mlen-mstart == len) lisstride = 1;
3161   }
3162 
3163   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3164   if (gisstride) {
3165     PetscInt N;
3166     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3167     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3168     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3169     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3170   } else {
3171     PetscInt cbs;
3172     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3173     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3174     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3175   }
3176 
3177   *isseq = iscol_local;
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 /*
3182  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3183  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3184 
3185  Input Parameters:
3186    mat - matrix
3187    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3188            i.e., mat->rstart <= isrow[i] < mat->rend
3189    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3190            i.e., mat->cstart <= iscol[i] < mat->cend
3191  Output Parameter:
3192    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3193    iscol_o - sequential column index set for retrieving mat->B
3194    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3195  */
3196 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3197 {
3198   PetscErrorCode ierr;
3199   Vec            x,cmap;
3200   const PetscInt *is_idx;
3201   PetscScalar    *xarray,*cmaparray;
3202   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3203   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3204   Mat            B=a->B;
3205   Vec            lvec=a->lvec,lcmap;
3206   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3207   MPI_Comm       comm;
3208   VecScatter     Mvctx=a->Mvctx;
3209 
3210   PetscFunctionBegin;
3211   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3212   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3213 
3214   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3215   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3216   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3217   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3218   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3219 
3220   /* Get start indices */
3221   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3222   isstart -= ncols;
3223   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3224 
3225   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3226   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3227   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3228   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3229   for (i=0; i<ncols; i++) {
3230     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3231     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3232     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3233   }
3234   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3235   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3236   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3237 
3238   /* Get iscol_d */
3239   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3240   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3241   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3242 
3243   /* Get isrow_d */
3244   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3245   rstart = mat->rmap->rstart;
3246   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3247   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3248   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3249   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3250 
3251   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3252   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3253   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3254 
3255   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3256   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3257   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3258 
3259   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3260 
3261   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3262   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3263 
3264   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3265   /* off-process column indices */
3266   count = 0;
3267   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3268   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3269 
3270   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3271   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3272   for (i=0; i<Bn; i++) {
3273     if (PetscRealPart(xarray[i]) > -1.0) {
3274       idx[count]     = i;                   /* local column index in off-diagonal part B */
3275       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3276       count++;
3277     }
3278   }
3279   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3280   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3281 
3282   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3283   /* cannot ensure iscol_o has same blocksize as iscol! */
3284 
3285   ierr = PetscFree(idx);CHKERRQ(ierr);
3286   *garray = cmap1;
3287 
3288   ierr = VecDestroy(&x);CHKERRQ(ierr);
3289   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3290   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3291   PetscFunctionReturn(0);
3292 }
3293 
3294 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3295 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3296 {
3297   PetscErrorCode ierr;
3298   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3299   Mat            M = NULL;
3300   MPI_Comm       comm;
3301   IS             iscol_d,isrow_d,iscol_o;
3302   Mat            Asub = NULL,Bsub = NULL;
3303   PetscInt       n;
3304 
3305   PetscFunctionBegin;
3306   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3307 
3308   if (call == MAT_REUSE_MATRIX) {
3309     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3310     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3311     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3312 
3313     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3314     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3315 
3316     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3317     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3318 
3319     /* Update diagonal and off-diagonal portions of submat */
3320     asub = (Mat_MPIAIJ*)(*submat)->data;
3321     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3322     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3323     if (n) {
3324       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3325     }
3326     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328 
3329   } else { /* call == MAT_INITIAL_MATRIX) */
3330     const PetscInt *garray;
3331     PetscInt        BsubN;
3332 
3333     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3334     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3335 
3336     /* Create local submatrices Asub and Bsub */
3337     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3338     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3339 
3340     /* Create submatrix M */
3341     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3342 
3343     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3344     asub = (Mat_MPIAIJ*)M->data;
3345 
3346     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3347     n = asub->B->cmap->N;
3348     if (BsubN > n) {
3349       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3350       const PetscInt *idx;
3351       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3352       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3353 
3354       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3355       j = 0;
3356       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3357       for (i=0; i<n; i++) {
3358         if (j >= BsubN) break;
3359         while (subgarray[i] > garray[j]) j++;
3360 
3361         if (subgarray[i] == garray[j]) {
3362           idx_new[i] = idx[j++];
3363         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3364       }
3365       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3366 
3367       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3368       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3369 
3370     } else if (BsubN < n) {
3371       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3372     }
3373 
3374     ierr = PetscFree(garray);CHKERRQ(ierr);
3375     *submat = M;
3376 
3377     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3378     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3379     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3380 
3381     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3382     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3383 
3384     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3385     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3386   }
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3391 {
3392   PetscErrorCode ierr;
3393   IS             iscol_local=NULL,isrow_d;
3394   PetscInt       csize;
3395   PetscInt       n,i,j,start,end;
3396   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3397   MPI_Comm       comm;
3398 
3399   PetscFunctionBegin;
3400   /* If isrow has same processor distribution as mat,
3401      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3402   if (call == MAT_REUSE_MATRIX) {
3403     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3404     if (isrow_d) {
3405       sameRowDist  = PETSC_TRUE;
3406       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3407     } else {
3408       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3409       if (iscol_local) {
3410         sameRowDist  = PETSC_TRUE;
3411         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3412       }
3413     }
3414   } else {
3415     /* Check if isrow has same processor distribution as mat */
3416     sameDist[0] = PETSC_FALSE;
3417     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3418     if (!n) {
3419       sameDist[0] = PETSC_TRUE;
3420     } else {
3421       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3422       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3423       if (i >= start && j < end) {
3424         sameDist[0] = PETSC_TRUE;
3425       }
3426     }
3427 
3428     /* Check if iscol has same processor distribution as mat */
3429     sameDist[1] = PETSC_FALSE;
3430     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3431     if (!n) {
3432       sameDist[1] = PETSC_TRUE;
3433     } else {
3434       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3435       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3436       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3437     }
3438 
3439     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3440     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3441     sameRowDist = tsameDist[0];
3442   }
3443 
3444   if (sameRowDist) {
3445     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3446       /* isrow and iscol have same processor distribution as mat */
3447       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3448       PetscFunctionReturn(0);
3449     } else { /* sameRowDist */
3450       /* isrow has same processor distribution as mat */
3451       if (call == MAT_INITIAL_MATRIX) {
3452         PetscBool sorted;
3453         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3454         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3455         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3456         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3457 
3458         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3459         if (sorted) {
3460           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3461           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3462           PetscFunctionReturn(0);
3463         }
3464       } else { /* call == MAT_REUSE_MATRIX */
3465         IS    iscol_sub;
3466         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3467         if (iscol_sub) {
3468           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3469           PetscFunctionReturn(0);
3470         }
3471       }
3472     }
3473   }
3474 
3475   /* General case: iscol -> iscol_local which has global size of iscol */
3476   if (call == MAT_REUSE_MATRIX) {
3477     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3478     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3479   } else {
3480     if (!iscol_local) {
3481       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3482     }
3483   }
3484 
3485   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3486   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3487 
3488   if (call == MAT_INITIAL_MATRIX) {
3489     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3490     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3491   }
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 /*@C
3496      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3497          and "off-diagonal" part of the matrix in CSR format.
3498 
3499    Collective
3500 
3501    Input Parameters:
3502 +  comm - MPI communicator
3503 .  A - "diagonal" portion of matrix
3504 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3505 -  garray - global index of B columns
3506 
3507    Output Parameter:
3508 .   mat - the matrix, with input A as its local diagonal matrix
3509    Level: advanced
3510 
3511    Notes:
3512        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3513        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3514 
3515 .seealso: MatCreateMPIAIJWithSplitArrays()
3516 @*/
3517 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3518 {
3519   PetscErrorCode ierr;
3520   Mat_MPIAIJ     *maij;
3521   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3522   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3523   PetscScalar    *oa=b->a;
3524   Mat            Bnew;
3525   PetscInt       m,n,N;
3526 
3527   PetscFunctionBegin;
3528   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3529   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3530   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3531   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3532   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3533   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3534 
3535   /* Get global columns of mat */
3536   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3537 
3538   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3539   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3540   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3541   maij = (Mat_MPIAIJ*)(*mat)->data;
3542 
3543   (*mat)->preallocated = PETSC_TRUE;
3544 
3545   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3546   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3547 
3548   /* Set A as diagonal portion of *mat */
3549   maij->A = A;
3550 
3551   nz = oi[m];
3552   for (i=0; i<nz; i++) {
3553     col   = oj[i];
3554     oj[i] = garray[col];
3555   }
3556 
3557    /* Set Bnew as off-diagonal portion of *mat */
3558   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3559   bnew        = (Mat_SeqAIJ*)Bnew->data;
3560   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3561   maij->B     = Bnew;
3562 
3563   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3564 
3565   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3566   b->free_a       = PETSC_FALSE;
3567   b->free_ij      = PETSC_FALSE;
3568   ierr = MatDestroy(&B);CHKERRQ(ierr);
3569 
3570   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3571   bnew->free_a       = PETSC_TRUE;
3572   bnew->free_ij      = PETSC_TRUE;
3573 
3574   /* condense columns of maij->B */
3575   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3576   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3577   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3578   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3579   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3584 
3585 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3586 {
3587   PetscErrorCode ierr;
3588   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3589   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3590   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3591   Mat            M,Msub,B=a->B;
3592   MatScalar      *aa;
3593   Mat_SeqAIJ     *aij;
3594   PetscInt       *garray = a->garray,*colsub,Ncols;
3595   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3596   IS             iscol_sub,iscmap;
3597   const PetscInt *is_idx,*cmap;
3598   PetscBool      allcolumns=PETSC_FALSE;
3599   MPI_Comm       comm;
3600 
3601   PetscFunctionBegin;
3602   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3603 
3604   if (call == MAT_REUSE_MATRIX) {
3605     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3606     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3607     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3608 
3609     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3610     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3611 
3612     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3613     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3614 
3615     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3616 
3617   } else { /* call == MAT_INITIAL_MATRIX) */
3618     PetscBool flg;
3619 
3620     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3621     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3622 
3623     /* (1) iscol -> nonscalable iscol_local */
3624     /* Check for special case: each processor gets entire matrix columns */
3625     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3626     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3627     if (allcolumns) {
3628       iscol_sub = iscol_local;
3629       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3630       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3631 
3632     } else {
3633       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3634       PetscInt *idx,*cmap1,k;
3635       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3636       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3637       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3638       count = 0;
3639       k     = 0;
3640       for (i=0; i<Ncols; i++) {
3641         j = is_idx[i];
3642         if (j >= cstart && j < cend) {
3643           /* diagonal part of mat */
3644           idx[count]     = j;
3645           cmap1[count++] = i; /* column index in submat */
3646         } else if (Bn) {
3647           /* off-diagonal part of mat */
3648           if (j == garray[k]) {
3649             idx[count]     = j;
3650             cmap1[count++] = i;  /* column index in submat */
3651           } else if (j > garray[k]) {
3652             while (j > garray[k] && k < Bn-1) k++;
3653             if (j == garray[k]) {
3654               idx[count]     = j;
3655               cmap1[count++] = i; /* column index in submat */
3656             }
3657           }
3658         }
3659       }
3660       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3661 
3662       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3663       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3664       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3665 
3666       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3667     }
3668 
3669     /* (3) Create sequential Msub */
3670     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3671   }
3672 
3673   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3674   aij  = (Mat_SeqAIJ*)(Msub)->data;
3675   ii   = aij->i;
3676   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3677 
3678   /*
3679       m - number of local rows
3680       Ncols - number of columns (same on all processors)
3681       rstart - first row in new global matrix generated
3682   */
3683   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3684 
3685   if (call == MAT_INITIAL_MATRIX) {
3686     /* (4) Create parallel newmat */
3687     PetscMPIInt    rank,size;
3688     PetscInt       csize;
3689 
3690     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3691     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3692 
3693     /*
3694         Determine the number of non-zeros in the diagonal and off-diagonal
3695         portions of the matrix in order to do correct preallocation
3696     */
3697 
3698     /* first get start and end of "diagonal" columns */
3699     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3700     if (csize == PETSC_DECIDE) {
3701       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3702       if (mglobal == Ncols) { /* square matrix */
3703         nlocal = m;
3704       } else {
3705         nlocal = Ncols/size + ((Ncols % size) > rank);
3706       }
3707     } else {
3708       nlocal = csize;
3709     }
3710     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3711     rstart = rend - nlocal;
3712     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3713 
3714     /* next, compute all the lengths */
3715     jj    = aij->j;
3716     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3717     olens = dlens + m;
3718     for (i=0; i<m; i++) {
3719       jend = ii[i+1] - ii[i];
3720       olen = 0;
3721       dlen = 0;
3722       for (j=0; j<jend; j++) {
3723         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3724         else dlen++;
3725         jj++;
3726       }
3727       olens[i] = olen;
3728       dlens[i] = dlen;
3729     }
3730 
3731     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3732     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3733 
3734     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3735     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3736     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3737     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3738     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3739     ierr = PetscFree(dlens);CHKERRQ(ierr);
3740 
3741   } else { /* call == MAT_REUSE_MATRIX */
3742     M    = *newmat;
3743     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3744     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3745     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3746     /*
3747          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3748        rather than the slower MatSetValues().
3749     */
3750     M->was_assembled = PETSC_TRUE;
3751     M->assembled     = PETSC_FALSE;
3752   }
3753 
3754   /* (5) Set values of Msub to *newmat */
3755   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3756   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3757 
3758   jj   = aij->j;
3759   aa   = aij->a;
3760   for (i=0; i<m; i++) {
3761     row = rstart + i;
3762     nz  = ii[i+1] - ii[i];
3763     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3764     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3765     jj += nz; aa += nz;
3766   }
3767   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3768 
3769   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3770   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3771 
3772   ierr = PetscFree(colsub);CHKERRQ(ierr);
3773 
3774   /* save Msub, iscol_sub and iscmap used in processor for next request */
3775   if (call ==  MAT_INITIAL_MATRIX) {
3776     *newmat = M;
3777     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3778     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3779 
3780     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3781     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3782 
3783     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3784     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3785 
3786     if (iscol_local) {
3787       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3788       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3789     }
3790   }
3791   PetscFunctionReturn(0);
3792 }
3793 
3794 /*
3795     Not great since it makes two copies of the submatrix, first an SeqAIJ
3796   in local and then by concatenating the local matrices the end result.
3797   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3798 
3799   Note: This requires a sequential iscol with all indices.
3800 */
3801 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3802 {
3803   PetscErrorCode ierr;
3804   PetscMPIInt    rank,size;
3805   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3806   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3807   Mat            M,Mreuse;
3808   MatScalar      *aa,*vwork;
3809   MPI_Comm       comm;
3810   Mat_SeqAIJ     *aij;
3811   PetscBool      colflag,allcolumns=PETSC_FALSE;
3812 
3813   PetscFunctionBegin;
3814   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3815   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3817 
3818   /* Check for special case: each processor gets entire matrix columns */
3819   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3820   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3821   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3822 
3823   if (call ==  MAT_REUSE_MATRIX) {
3824     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3825     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3826     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3827   } else {
3828     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3829   }
3830 
3831   /*
3832       m - number of local rows
3833       n - number of columns (same on all processors)
3834       rstart - first row in new global matrix generated
3835   */
3836   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3837   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3838   if (call == MAT_INITIAL_MATRIX) {
3839     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3840     ii  = aij->i;
3841     jj  = aij->j;
3842 
3843     /*
3844         Determine the number of non-zeros in the diagonal and off-diagonal
3845         portions of the matrix in order to do correct preallocation
3846     */
3847 
3848     /* first get start and end of "diagonal" columns */
3849     if (csize == PETSC_DECIDE) {
3850       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3851       if (mglobal == n) { /* square matrix */
3852         nlocal = m;
3853       } else {
3854         nlocal = n/size + ((n % size) > rank);
3855       }
3856     } else {
3857       nlocal = csize;
3858     }
3859     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3860     rstart = rend - nlocal;
3861     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3862 
3863     /* next, compute all the lengths */
3864     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3865     olens = dlens + m;
3866     for (i=0; i<m; i++) {
3867       jend = ii[i+1] - ii[i];
3868       olen = 0;
3869       dlen = 0;
3870       for (j=0; j<jend; j++) {
3871         if (*jj < rstart || *jj >= rend) olen++;
3872         else dlen++;
3873         jj++;
3874       }
3875       olens[i] = olen;
3876       dlens[i] = dlen;
3877     }
3878     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3879     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3880     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3881     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3882     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3883     ierr = PetscFree(dlens);CHKERRQ(ierr);
3884   } else {
3885     PetscInt ml,nl;
3886 
3887     M    = *newmat;
3888     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3889     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3890     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3891     /*
3892          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3893        rather than the slower MatSetValues().
3894     */
3895     M->was_assembled = PETSC_TRUE;
3896     M->assembled     = PETSC_FALSE;
3897   }
3898   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3899   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3900   ii   = aij->i;
3901   jj   = aij->j;
3902   aa   = aij->a;
3903   for (i=0; i<m; i++) {
3904     row   = rstart + i;
3905     nz    = ii[i+1] - ii[i];
3906     cwork = jj;     jj += nz;
3907     vwork = aa;     aa += nz;
3908     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3909   }
3910 
3911   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3912   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3913   *newmat = M;
3914 
3915   /* save submatrix used in processor for next request */
3916   if (call ==  MAT_INITIAL_MATRIX) {
3917     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3918     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3919   }
3920   PetscFunctionReturn(0);
3921 }
3922 
3923 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3924 {
3925   PetscInt       m,cstart, cend,j,nnz,i,d;
3926   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3927   const PetscInt *JJ;
3928   PetscErrorCode ierr;
3929   PetscBool      nooffprocentries;
3930 
3931   PetscFunctionBegin;
3932   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3933 
3934   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3935   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3936   m      = B->rmap->n;
3937   cstart = B->cmap->rstart;
3938   cend   = B->cmap->rend;
3939   rstart = B->rmap->rstart;
3940 
3941   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3942 
3943 #if defined(PETSC_USE_DEBUG)
3944   for (i=0; i<m; i++) {
3945     nnz = Ii[i+1]- Ii[i];
3946     JJ  = J + Ii[i];
3947     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3948     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3949     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3950   }
3951 #endif
3952 
3953   for (i=0; i<m; i++) {
3954     nnz     = Ii[i+1]- Ii[i];
3955     JJ      = J + Ii[i];
3956     nnz_max = PetscMax(nnz_max,nnz);
3957     d       = 0;
3958     for (j=0; j<nnz; j++) {
3959       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3960     }
3961     d_nnz[i] = d;
3962     o_nnz[i] = nnz - d;
3963   }
3964   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3965   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3966 
3967   for (i=0; i<m; i++) {
3968     ii   = i + rstart;
3969     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3970   }
3971   nooffprocentries    = B->nooffprocentries;
3972   B->nooffprocentries = PETSC_TRUE;
3973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   B->nooffprocentries = nooffprocentries;
3976 
3977   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 /*@
3982    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3983    (the default parallel PETSc format).
3984 
3985    Collective
3986 
3987    Input Parameters:
3988 +  B - the matrix
3989 .  i - the indices into j for the start of each local row (starts with zero)
3990 .  j - the column indices for each local row (starts with zero)
3991 -  v - optional values in the matrix
3992 
3993    Level: developer
3994 
3995    Notes:
3996        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3997      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3998      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3999 
4000        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4001 
4002        The format which is used for the sparse matrix input, is equivalent to a
4003     row-major ordering.. i.e for the following matrix, the input data expected is
4004     as shown
4005 
4006 $        1 0 0
4007 $        2 0 3     P0
4008 $       -------
4009 $        4 5 6     P1
4010 $
4011 $     Process0 [P0]: rows_owned=[0,1]
4012 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4013 $        j =  {0,0,2}  [size = 3]
4014 $        v =  {1,2,3}  [size = 3]
4015 $
4016 $     Process1 [P1]: rows_owned=[2]
4017 $        i =  {0,3}    [size = nrow+1  = 1+1]
4018 $        j =  {0,1,2}  [size = 3]
4019 $        v =  {4,5,6}  [size = 3]
4020 
4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4022           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4023 @*/
4024 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4025 {
4026   PetscErrorCode ierr;
4027 
4028   PetscFunctionBegin;
4029   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4030   PetscFunctionReturn(0);
4031 }
4032 
4033 /*@C
4034    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4035    (the default parallel PETSc format).  For good matrix assembly performance
4036    the user should preallocate the matrix storage by setting the parameters
4037    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4038    performance can be increased by more than a factor of 50.
4039 
4040    Collective
4041 
4042    Input Parameters:
4043 +  B - the matrix
4044 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4045            (same value is used for all local rows)
4046 .  d_nnz - array containing the number of nonzeros in the various rows of the
4047            DIAGONAL portion of the local submatrix (possibly different for each row)
4048            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4049            The size of this array is equal to the number of local rows, i.e 'm'.
4050            For matrices that will be factored, you must leave room for (and set)
4051            the diagonal entry even if it is zero.
4052 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4053            submatrix (same value is used for all local rows).
4054 -  o_nnz - array containing the number of nonzeros in the various rows of the
4055            OFF-DIAGONAL portion of the local submatrix (possibly different for
4056            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4057            structure. The size of this array is equal to the number
4058            of local rows, i.e 'm'.
4059 
4060    If the *_nnz parameter is given then the *_nz parameter is ignored
4061 
4062    The AIJ format (also called the Yale sparse matrix format or
4063    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4064    storage.  The stored row and column indices begin with zero.
4065    See Users-Manual: ch_mat for details.
4066 
4067    The parallel matrix is partitioned such that the first m0 rows belong to
4068    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4069    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4070 
4071    The DIAGONAL portion of the local submatrix of a processor can be defined
4072    as the submatrix which is obtained by extraction the part corresponding to
4073    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4074    first row that belongs to the processor, r2 is the last row belonging to
4075    the this processor, and c1-c2 is range of indices of the local part of a
4076    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4077    common case of a square matrix, the row and column ranges are the same and
4078    the DIAGONAL part is also square. The remaining portion of the local
4079    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4080 
4081    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4082 
4083    You can call MatGetInfo() to get information on how effective the preallocation was;
4084    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4085    You can also run with the option -info and look for messages with the string
4086    malloc in them to see if additional memory allocation was needed.
4087 
4088    Example usage:
4089 
4090    Consider the following 8x8 matrix with 34 non-zero values, that is
4091    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4092    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4093    as follows:
4094 
4095 .vb
4096             1  2  0  |  0  3  0  |  0  4
4097     Proc0   0  5  6  |  7  0  0  |  8  0
4098             9  0 10  | 11  0  0  | 12  0
4099     -------------------------------------
4100            13  0 14  | 15 16 17  |  0  0
4101     Proc1   0 18  0  | 19 20 21  |  0  0
4102             0  0  0  | 22 23  0  | 24  0
4103     -------------------------------------
4104     Proc2  25 26 27  |  0  0 28  | 29  0
4105            30  0  0  | 31 32 33  |  0 34
4106 .ve
4107 
4108    This can be represented as a collection of submatrices as:
4109 
4110 .vb
4111       A B C
4112       D E F
4113       G H I
4114 .ve
4115 
4116    Where the submatrices A,B,C are owned by proc0, D,E,F are
4117    owned by proc1, G,H,I are owned by proc2.
4118 
4119    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4120    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4121    The 'M','N' parameters are 8,8, and have the same values on all procs.
4122 
4123    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4124    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4125    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4126    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4127    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4128    matrix, ans [DF] as another SeqAIJ matrix.
4129 
4130    When d_nz, o_nz parameters are specified, d_nz storage elements are
4131    allocated for every row of the local diagonal submatrix, and o_nz
4132    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4133    One way to choose d_nz and o_nz is to use the max nonzerors per local
4134    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4135    In this case, the values of d_nz,o_nz are:
4136 .vb
4137      proc0 : dnz = 2, o_nz = 2
4138      proc1 : dnz = 3, o_nz = 2
4139      proc2 : dnz = 1, o_nz = 4
4140 .ve
4141    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4142    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4143    for proc3. i.e we are using 12+15+10=37 storage locations to store
4144    34 values.
4145 
4146    When d_nnz, o_nnz parameters are specified, the storage is specified
4147    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4148    In the above case the values for d_nnz,o_nnz are:
4149 .vb
4150      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4151      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4152      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4153 .ve
4154    Here the space allocated is sum of all the above values i.e 34, and
4155    hence pre-allocation is perfect.
4156 
4157    Level: intermediate
4158 
4159 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4160           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4161 @*/
4162 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4163 {
4164   PetscErrorCode ierr;
4165 
4166   PetscFunctionBegin;
4167   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4168   PetscValidType(B,1);
4169   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4170   PetscFunctionReturn(0);
4171 }
4172 
4173 /*@
4174      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4175          CSR format for the local rows.
4176 
4177    Collective
4178 
4179    Input Parameters:
4180 +  comm - MPI communicator
4181 .  m - number of local rows (Cannot be PETSC_DECIDE)
4182 .  n - This value should be the same as the local size used in creating the
4183        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4184        calculated if N is given) For square matrices n is almost always m.
4185 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4186 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4187 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4188 .   j - column indices
4189 -   a - matrix values
4190 
4191    Output Parameter:
4192 .   mat - the matrix
4193 
4194    Level: intermediate
4195 
4196    Notes:
4197        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4198      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4199      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4200 
4201        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4202 
4203        The format which is used for the sparse matrix input, is equivalent to a
4204     row-major ordering.. i.e for the following matrix, the input data expected is
4205     as shown
4206 
4207        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4208 
4209 $        1 0 0
4210 $        2 0 3     P0
4211 $       -------
4212 $        4 5 6     P1
4213 $
4214 $     Process0 [P0]: rows_owned=[0,1]
4215 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4216 $        j =  {0,0,2}  [size = 3]
4217 $        v =  {1,2,3}  [size = 3]
4218 $
4219 $     Process1 [P1]: rows_owned=[2]
4220 $        i =  {0,3}    [size = nrow+1  = 1+1]
4221 $        j =  {0,1,2}  [size = 3]
4222 $        v =  {4,5,6}  [size = 3]
4223 
4224 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4225           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4226 @*/
4227 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4228 {
4229   PetscErrorCode ierr;
4230 
4231   PetscFunctionBegin;
4232   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4233   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4234   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4235   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4236   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4237   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4238   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4239   PetscFunctionReturn(0);
4240 }
4241 
4242 /*@
4243      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4244          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4245 
4246    Collective
4247 
4248    Input Parameters:
4249 +  mat - the matrix
4250 .  m - number of local rows (Cannot be PETSC_DECIDE)
4251 .  n - This value should be the same as the local size used in creating the
4252        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4253        calculated if N is given) For square matrices n is almost always m.
4254 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4255 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4256 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4257 .  J - column indices
4258 -  v - matrix values
4259 
4260    Level: intermediate
4261 
4262 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4263           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4264 @*/
4265 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4266 {
4267   PetscErrorCode ierr;
4268   PetscInt       cstart,nnz,i,j;
4269   PetscInt       *ld;
4270   PetscBool      nooffprocentries;
4271   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4272   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4273   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4274   const PetscInt *Adi = Ad->i;
4275   PetscInt       ldi,Iii,md;
4276 
4277   PetscFunctionBegin;
4278   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4279   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4280   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4281   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4282 
4283   cstart = mat->cmap->rstart;
4284   if (!Aij->ld) {
4285     /* count number of entries below block diagonal */
4286     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4287     Aij->ld = ld;
4288     for (i=0; i<m; i++) {
4289       nnz  = Ii[i+1]- Ii[i];
4290       j     = 0;
4291       while  (J[j] < cstart && j < nnz) {j++;}
4292       J    += nnz;
4293       ld[i] = j;
4294     }
4295   } else {
4296     ld = Aij->ld;
4297   }
4298 
4299   for (i=0; i<m; i++) {
4300     nnz  = Ii[i+1]- Ii[i];
4301     Iii  = Ii[i];
4302     ldi  = ld[i];
4303     md   = Adi[i+1]-Adi[i];
4304     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4305     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4306     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4307     ad  += md;
4308     ao  += nnz - md;
4309   }
4310   nooffprocentries      = mat->nooffprocentries;
4311   mat->nooffprocentries = PETSC_TRUE;
4312   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4313   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4314   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4315   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4316   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4317   mat->nooffprocentries = nooffprocentries;
4318   PetscFunctionReturn(0);
4319 }
4320 
4321 /*@C
4322    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4323    (the default parallel PETSc format).  For good matrix assembly performance
4324    the user should preallocate the matrix storage by setting the parameters
4325    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4326    performance can be increased by more than a factor of 50.
4327 
4328    Collective
4329 
4330    Input Parameters:
4331 +  comm - MPI communicator
4332 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4333            This value should be the same as the local size used in creating the
4334            y vector for the matrix-vector product y = Ax.
4335 .  n - This value should be the same as the local size used in creating the
4336        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4337        calculated if N is given) For square matrices n is almost always m.
4338 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4339 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4340 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4341            (same value is used for all local rows)
4342 .  d_nnz - array containing the number of nonzeros in the various rows of the
4343            DIAGONAL portion of the local submatrix (possibly different for each row)
4344            or NULL, if d_nz is used to specify the nonzero structure.
4345            The size of this array is equal to the number of local rows, i.e 'm'.
4346 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4347            submatrix (same value is used for all local rows).
4348 -  o_nnz - array containing the number of nonzeros in the various rows of the
4349            OFF-DIAGONAL portion of the local submatrix (possibly different for
4350            each row) or NULL, if o_nz is used to specify the nonzero
4351            structure. The size of this array is equal to the number
4352            of local rows, i.e 'm'.
4353 
4354    Output Parameter:
4355 .  A - the matrix
4356 
4357    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4358    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4359    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4360 
4361    Notes:
4362    If the *_nnz parameter is given then the *_nz parameter is ignored
4363 
4364    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4365    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4366    storage requirements for this matrix.
4367 
4368    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4369    processor than it must be used on all processors that share the object for
4370    that argument.
4371 
4372    The user MUST specify either the local or global matrix dimensions
4373    (possibly both).
4374 
4375    The parallel matrix is partitioned across processors such that the
4376    first m0 rows belong to process 0, the next m1 rows belong to
4377    process 1, the next m2 rows belong to process 2 etc.. where
4378    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4379    values corresponding to [m x N] submatrix.
4380 
4381    The columns are logically partitioned with the n0 columns belonging
4382    to 0th partition, the next n1 columns belonging to the next
4383    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4384 
4385    The DIAGONAL portion of the local submatrix on any given processor
4386    is the submatrix corresponding to the rows and columns m,n
4387    corresponding to the given processor. i.e diagonal matrix on
4388    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4389    etc. The remaining portion of the local submatrix [m x (N-n)]
4390    constitute the OFF-DIAGONAL portion. The example below better
4391    illustrates this concept.
4392 
4393    For a square global matrix we define each processor's diagonal portion
4394    to be its local rows and the corresponding columns (a square submatrix);
4395    each processor's off-diagonal portion encompasses the remainder of the
4396    local matrix (a rectangular submatrix).
4397 
4398    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4399 
4400    When calling this routine with a single process communicator, a matrix of
4401    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4402    type of communicator, use the construction mechanism
4403 .vb
4404      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4405 .ve
4406 
4407 $     MatCreate(...,&A);
4408 $     MatSetType(A,MATMPIAIJ);
4409 $     MatSetSizes(A, m,n,M,N);
4410 $     MatMPIAIJSetPreallocation(A,...);
4411 
4412    By default, this format uses inodes (identical nodes) when possible.
4413    We search for consecutive rows with the same nonzero structure, thereby
4414    reusing matrix information to achieve increased efficiency.
4415 
4416    Options Database Keys:
4417 +  -mat_no_inode  - Do not use inodes
4418 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4419 
4420 
4421 
4422    Example usage:
4423 
4424    Consider the following 8x8 matrix with 34 non-zero values, that is
4425    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4426    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4427    as follows
4428 
4429 .vb
4430             1  2  0  |  0  3  0  |  0  4
4431     Proc0   0  5  6  |  7  0  0  |  8  0
4432             9  0 10  | 11  0  0  | 12  0
4433     -------------------------------------
4434            13  0 14  | 15 16 17  |  0  0
4435     Proc1   0 18  0  | 19 20 21  |  0  0
4436             0  0  0  | 22 23  0  | 24  0
4437     -------------------------------------
4438     Proc2  25 26 27  |  0  0 28  | 29  0
4439            30  0  0  | 31 32 33  |  0 34
4440 .ve
4441 
4442    This can be represented as a collection of submatrices as
4443 
4444 .vb
4445       A B C
4446       D E F
4447       G H I
4448 .ve
4449 
4450    Where the submatrices A,B,C are owned by proc0, D,E,F are
4451    owned by proc1, G,H,I are owned by proc2.
4452 
4453    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4454    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4455    The 'M','N' parameters are 8,8, and have the same values on all procs.
4456 
4457    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4458    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4459    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4460    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4461    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4462    matrix, ans [DF] as another SeqAIJ matrix.
4463 
4464    When d_nz, o_nz parameters are specified, d_nz storage elements are
4465    allocated for every row of the local diagonal submatrix, and o_nz
4466    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4467    One way to choose d_nz and o_nz is to use the max nonzerors per local
4468    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4469    In this case, the values of d_nz,o_nz are
4470 .vb
4471      proc0 : dnz = 2, o_nz = 2
4472      proc1 : dnz = 3, o_nz = 2
4473      proc2 : dnz = 1, o_nz = 4
4474 .ve
4475    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4476    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4477    for proc3. i.e we are using 12+15+10=37 storage locations to store
4478    34 values.
4479 
4480    When d_nnz, o_nnz parameters are specified, the storage is specified
4481    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4482    In the above case the values for d_nnz,o_nnz are
4483 .vb
4484      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4485      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4486      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4487 .ve
4488    Here the space allocated is sum of all the above values i.e 34, and
4489    hence pre-allocation is perfect.
4490 
4491    Level: intermediate
4492 
4493 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4494           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4495 @*/
4496 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4497 {
4498   PetscErrorCode ierr;
4499   PetscMPIInt    size;
4500 
4501   PetscFunctionBegin;
4502   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4503   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4504   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4505   if (size > 1) {
4506     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4507     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4508   } else {
4509     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4510     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4511   }
4512   PetscFunctionReturn(0);
4513 }
4514 
4515 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4516 {
4517   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4518   PetscBool      flg;
4519   PetscErrorCode ierr;
4520 
4521   PetscFunctionBegin;
4522   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4523   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4524   if (Ad)     *Ad     = a->A;
4525   if (Ao)     *Ao     = a->B;
4526   if (colmap) *colmap = a->garray;
4527   PetscFunctionReturn(0);
4528 }
4529 
4530 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4531 {
4532   PetscErrorCode ierr;
4533   PetscInt       m,N,i,rstart,nnz,Ii;
4534   PetscInt       *indx;
4535   PetscScalar    *values;
4536 
4537   PetscFunctionBegin;
4538   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4539   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4540     PetscInt       *dnz,*onz,sum,bs,cbs;
4541 
4542     if (n == PETSC_DECIDE) {
4543       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4544     }
4545     /* Check sum(n) = N */
4546     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4547     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4548 
4549     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4550     rstart -= m;
4551 
4552     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4553     for (i=0; i<m; i++) {
4554       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4555       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4556       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4557     }
4558 
4559     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4560     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4561     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4562     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4563     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4564     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4565     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4566     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4567   }
4568 
4569   /* numeric phase */
4570   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4571   for (i=0; i<m; i++) {
4572     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4573     Ii   = i + rstart;
4574     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4575     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4576   }
4577   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4578   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4579   PetscFunctionReturn(0);
4580 }
4581 
4582 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4583 {
4584   PetscErrorCode    ierr;
4585   PetscMPIInt       rank;
4586   PetscInt          m,N,i,rstart,nnz;
4587   size_t            len;
4588   const PetscInt    *indx;
4589   PetscViewer       out;
4590   char              *name;
4591   Mat               B;
4592   const PetscScalar *values;
4593 
4594   PetscFunctionBegin;
4595   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4596   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4597   /* Should this be the type of the diagonal block of A? */
4598   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4599   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4600   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4601   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4602   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4603   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4604   for (i=0; i<m; i++) {
4605     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4606     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4607     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4608   }
4609   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4610   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611 
4612   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4613   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4614   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4615   sprintf(name,"%s.%d",outfile,rank);
4616   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4617   ierr = PetscFree(name);CHKERRQ(ierr);
4618   ierr = MatView(B,out);CHKERRQ(ierr);
4619   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4620   ierr = MatDestroy(&B);CHKERRQ(ierr);
4621   PetscFunctionReturn(0);
4622 }
4623 
4624 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4625 {
4626   PetscErrorCode      ierr;
4627   Mat_Merge_SeqsToMPI *merge;
4628   PetscContainer      container;
4629 
4630   PetscFunctionBegin;
4631   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4632   if (container) {
4633     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4634     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4635     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4636     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4637     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4638     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4639     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4640     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4641     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4642     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4643     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4644     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4645     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4646     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4647     ierr = PetscFree(merge);CHKERRQ(ierr);
4648     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4649   }
4650   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4651   PetscFunctionReturn(0);
4652 }
4653 
4654 #include <../src/mat/utils/freespace.h>
4655 #include <petscbt.h>
4656 
4657 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4658 {
4659   PetscErrorCode      ierr;
4660   MPI_Comm            comm;
4661   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4662   PetscMPIInt         size,rank,taga,*len_s;
4663   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4664   PetscInt            proc,m;
4665   PetscInt            **buf_ri,**buf_rj;
4666   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4667   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4668   MPI_Request         *s_waits,*r_waits;
4669   MPI_Status          *status;
4670   MatScalar           *aa=a->a;
4671   MatScalar           **abuf_r,*ba_i;
4672   Mat_Merge_SeqsToMPI *merge;
4673   PetscContainer      container;
4674 
4675   PetscFunctionBegin;
4676   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4677   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4678 
4679   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4680   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4681 
4682   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4683   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4684 
4685   bi     = merge->bi;
4686   bj     = merge->bj;
4687   buf_ri = merge->buf_ri;
4688   buf_rj = merge->buf_rj;
4689 
4690   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4691   owners = merge->rowmap->range;
4692   len_s  = merge->len_s;
4693 
4694   /* send and recv matrix values */
4695   /*-----------------------------*/
4696   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4697   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4698 
4699   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4700   for (proc=0,k=0; proc<size; proc++) {
4701     if (!len_s[proc]) continue;
4702     i    = owners[proc];
4703     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4704     k++;
4705   }
4706 
4707   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4708   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4709   ierr = PetscFree(status);CHKERRQ(ierr);
4710 
4711   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4712   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4713 
4714   /* insert mat values of mpimat */
4715   /*----------------------------*/
4716   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4717   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4718 
4719   for (k=0; k<merge->nrecv; k++) {
4720     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4721     nrows       = *(buf_ri_k[k]);
4722     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4723     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4724   }
4725 
4726   /* set values of ba */
4727   m = merge->rowmap->n;
4728   for (i=0; i<m; i++) {
4729     arow = owners[rank] + i;
4730     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4731     bnzi = bi[i+1] - bi[i];
4732     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4733 
4734     /* add local non-zero vals of this proc's seqmat into ba */
4735     anzi   = ai[arow+1] - ai[arow];
4736     aj     = a->j + ai[arow];
4737     aa     = a->a + ai[arow];
4738     nextaj = 0;
4739     for (j=0; nextaj<anzi; j++) {
4740       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4741         ba_i[j] += aa[nextaj++];
4742       }
4743     }
4744 
4745     /* add received vals into ba */
4746     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4747       /* i-th row */
4748       if (i == *nextrow[k]) {
4749         anzi   = *(nextai[k]+1) - *nextai[k];
4750         aj     = buf_rj[k] + *(nextai[k]);
4751         aa     = abuf_r[k] + *(nextai[k]);
4752         nextaj = 0;
4753         for (j=0; nextaj<anzi; j++) {
4754           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4755             ba_i[j] += aa[nextaj++];
4756           }
4757         }
4758         nextrow[k]++; nextai[k]++;
4759       }
4760     }
4761     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4762   }
4763   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4764   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4765 
4766   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4767   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4768   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4769   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4770   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4771   PetscFunctionReturn(0);
4772 }
4773 
4774 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4775 {
4776   PetscErrorCode      ierr;
4777   Mat                 B_mpi;
4778   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4779   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4780   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4781   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4782   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4783   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4784   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4785   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4786   MPI_Status          *status;
4787   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4788   PetscBT             lnkbt;
4789   Mat_Merge_SeqsToMPI *merge;
4790   PetscContainer      container;
4791 
4792   PetscFunctionBegin;
4793   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4794 
4795   /* make sure it is a PETSc comm */
4796   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4798   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4799 
4800   ierr = PetscNew(&merge);CHKERRQ(ierr);
4801   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4802 
4803   /* determine row ownership */
4804   /*---------------------------------------------------------*/
4805   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4806   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4807   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4808   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4809   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4810   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4811   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4812 
4813   m      = merge->rowmap->n;
4814   owners = merge->rowmap->range;
4815 
4816   /* determine the number of messages to send, their lengths */
4817   /*---------------------------------------------------------*/
4818   len_s = merge->len_s;
4819 
4820   len          = 0; /* length of buf_si[] */
4821   merge->nsend = 0;
4822   for (proc=0; proc<size; proc++) {
4823     len_si[proc] = 0;
4824     if (proc == rank) {
4825       len_s[proc] = 0;
4826     } else {
4827       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4828       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4829     }
4830     if (len_s[proc]) {
4831       merge->nsend++;
4832       nrows = 0;
4833       for (i=owners[proc]; i<owners[proc+1]; i++) {
4834         if (ai[i+1] > ai[i]) nrows++;
4835       }
4836       len_si[proc] = 2*(nrows+1);
4837       len         += len_si[proc];
4838     }
4839   }
4840 
4841   /* determine the number and length of messages to receive for ij-structure */
4842   /*-------------------------------------------------------------------------*/
4843   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4844   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4845 
4846   /* post the Irecv of j-structure */
4847   /*-------------------------------*/
4848   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4849   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4850 
4851   /* post the Isend of j-structure */
4852   /*--------------------------------*/
4853   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4854 
4855   for (proc=0, k=0; proc<size; proc++) {
4856     if (!len_s[proc]) continue;
4857     i    = owners[proc];
4858     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4859     k++;
4860   }
4861 
4862   /* receives and sends of j-structure are complete */
4863   /*------------------------------------------------*/
4864   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4865   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4866 
4867   /* send and recv i-structure */
4868   /*---------------------------*/
4869   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4870   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4871 
4872   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4873   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4874   for (proc=0,k=0; proc<size; proc++) {
4875     if (!len_s[proc]) continue;
4876     /* form outgoing message for i-structure:
4877          buf_si[0]:                 nrows to be sent
4878                [1:nrows]:           row index (global)
4879                [nrows+1:2*nrows+1]: i-structure index
4880     */
4881     /*-------------------------------------------*/
4882     nrows       = len_si[proc]/2 - 1;
4883     buf_si_i    = buf_si + nrows+1;
4884     buf_si[0]   = nrows;
4885     buf_si_i[0] = 0;
4886     nrows       = 0;
4887     for (i=owners[proc]; i<owners[proc+1]; i++) {
4888       anzi = ai[i+1] - ai[i];
4889       if (anzi) {
4890         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4891         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4892         nrows++;
4893       }
4894     }
4895     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4896     k++;
4897     buf_si += len_si[proc];
4898   }
4899 
4900   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4901   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4902 
4903   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4904   for (i=0; i<merge->nrecv; i++) {
4905     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4906   }
4907 
4908   ierr = PetscFree(len_si);CHKERRQ(ierr);
4909   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4910   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4911   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4912   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4913   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4914   ierr = PetscFree(status);CHKERRQ(ierr);
4915 
4916   /* compute a local seq matrix in each processor */
4917   /*----------------------------------------------*/
4918   /* allocate bi array and free space for accumulating nonzero column info */
4919   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4920   bi[0] = 0;
4921 
4922   /* create and initialize a linked list */
4923   nlnk = N+1;
4924   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4925 
4926   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4927   len  = ai[owners[rank+1]] - ai[owners[rank]];
4928   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4929 
4930   current_space = free_space;
4931 
4932   /* determine symbolic info for each local row */
4933   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4934 
4935   for (k=0; k<merge->nrecv; k++) {
4936     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4937     nrows       = *buf_ri_k[k];
4938     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4939     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4940   }
4941 
4942   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4943   len  = 0;
4944   for (i=0; i<m; i++) {
4945     bnzi = 0;
4946     /* add local non-zero cols of this proc's seqmat into lnk */
4947     arow  = owners[rank] + i;
4948     anzi  = ai[arow+1] - ai[arow];
4949     aj    = a->j + ai[arow];
4950     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4951     bnzi += nlnk;
4952     /* add received col data into lnk */
4953     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4954       if (i == *nextrow[k]) { /* i-th row */
4955         anzi  = *(nextai[k]+1) - *nextai[k];
4956         aj    = buf_rj[k] + *nextai[k];
4957         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4958         bnzi += nlnk;
4959         nextrow[k]++; nextai[k]++;
4960       }
4961     }
4962     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4963 
4964     /* if free space is not available, make more free space */
4965     if (current_space->local_remaining<bnzi) {
4966       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4967       nspacedouble++;
4968     }
4969     /* copy data into free space, then initialize lnk */
4970     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4971     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4972 
4973     current_space->array           += bnzi;
4974     current_space->local_used      += bnzi;
4975     current_space->local_remaining -= bnzi;
4976 
4977     bi[i+1] = bi[i] + bnzi;
4978   }
4979 
4980   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4981 
4982   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4983   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4984   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4985 
4986   /* create symbolic parallel matrix B_mpi */
4987   /*---------------------------------------*/
4988   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4989   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4990   if (n==PETSC_DECIDE) {
4991     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4992   } else {
4993     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4994   }
4995   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4996   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4997   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4998   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4999   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5000 
5001   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5002   B_mpi->assembled    = PETSC_FALSE;
5003   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5004   merge->bi           = bi;
5005   merge->bj           = bj;
5006   merge->buf_ri       = buf_ri;
5007   merge->buf_rj       = buf_rj;
5008   merge->coi          = NULL;
5009   merge->coj          = NULL;
5010   merge->owners_co    = NULL;
5011 
5012   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5013 
5014   /* attach the supporting struct to B_mpi for reuse */
5015   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5016   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5017   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5018   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5019   *mpimat = B_mpi;
5020 
5021   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5022   PetscFunctionReturn(0);
5023 }
5024 
5025 /*@C
5026       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5027                  matrices from each processor
5028 
5029     Collective
5030 
5031    Input Parameters:
5032 +    comm - the communicators the parallel matrix will live on
5033 .    seqmat - the input sequential matrices
5034 .    m - number of local rows (or PETSC_DECIDE)
5035 .    n - number of local columns (or PETSC_DECIDE)
5036 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5037 
5038    Output Parameter:
5039 .    mpimat - the parallel matrix generated
5040 
5041     Level: advanced
5042 
5043    Notes:
5044      The dimensions of the sequential matrix in each processor MUST be the same.
5045      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5046      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5047 @*/
5048 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5049 {
5050   PetscErrorCode ierr;
5051   PetscMPIInt    size;
5052 
5053   PetscFunctionBegin;
5054   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5055   if (size == 1) {
5056     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5057     if (scall == MAT_INITIAL_MATRIX) {
5058       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5059     } else {
5060       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5061     }
5062     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5063     PetscFunctionReturn(0);
5064   }
5065   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5066   if (scall == MAT_INITIAL_MATRIX) {
5067     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5068   }
5069   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5070   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5071   PetscFunctionReturn(0);
5072 }
5073 
5074 /*@
5075      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5076           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5077           with MatGetSize()
5078 
5079     Not Collective
5080 
5081    Input Parameters:
5082 +    A - the matrix
5083 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5084 
5085    Output Parameter:
5086 .    A_loc - the local sequential matrix generated
5087 
5088     Level: developer
5089 
5090 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5091 
5092 @*/
5093 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5094 {
5095   PetscErrorCode ierr;
5096   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5097   Mat_SeqAIJ     *mat,*a,*b;
5098   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5099   MatScalar      *aa,*ba,*cam;
5100   PetscScalar    *ca;
5101   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5102   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5103   PetscBool      match;
5104   MPI_Comm       comm;
5105   PetscMPIInt    size;
5106 
5107   PetscFunctionBegin;
5108   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5109   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5110   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5111   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5112   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5113 
5114   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5115   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5116   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5117   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5118   aa = a->a; ba = b->a;
5119   if (scall == MAT_INITIAL_MATRIX) {
5120     if (size == 1) {
5121       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5122       PetscFunctionReturn(0);
5123     }
5124 
5125     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5126     ci[0] = 0;
5127     for (i=0; i<am; i++) {
5128       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5129     }
5130     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5131     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5132     k    = 0;
5133     for (i=0; i<am; i++) {
5134       ncols_o = bi[i+1] - bi[i];
5135       ncols_d = ai[i+1] - ai[i];
5136       /* off-diagonal portion of A */
5137       for (jo=0; jo<ncols_o; jo++) {
5138         col = cmap[*bj];
5139         if (col >= cstart) break;
5140         cj[k]   = col; bj++;
5141         ca[k++] = *ba++;
5142       }
5143       /* diagonal portion of A */
5144       for (j=0; j<ncols_d; j++) {
5145         cj[k]   = cstart + *aj++;
5146         ca[k++] = *aa++;
5147       }
5148       /* off-diagonal portion of A */
5149       for (j=jo; j<ncols_o; j++) {
5150         cj[k]   = cmap[*bj++];
5151         ca[k++] = *ba++;
5152       }
5153     }
5154     /* put together the new matrix */
5155     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5156     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5157     /* Since these are PETSc arrays, change flags to free them as necessary. */
5158     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5159     mat->free_a  = PETSC_TRUE;
5160     mat->free_ij = PETSC_TRUE;
5161     mat->nonew   = 0;
5162   } else if (scall == MAT_REUSE_MATRIX) {
5163     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5164     ci = mat->i; cj = mat->j; cam = mat->a;
5165     for (i=0; i<am; i++) {
5166       /* off-diagonal portion of A */
5167       ncols_o = bi[i+1] - bi[i];
5168       for (jo=0; jo<ncols_o; jo++) {
5169         col = cmap[*bj];
5170         if (col >= cstart) break;
5171         *cam++ = *ba++; bj++;
5172       }
5173       /* diagonal portion of A */
5174       ncols_d = ai[i+1] - ai[i];
5175       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5176       /* off-diagonal portion of A */
5177       for (j=jo; j<ncols_o; j++) {
5178         *cam++ = *ba++; bj++;
5179       }
5180     }
5181   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5182   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5183   PetscFunctionReturn(0);
5184 }
5185 
5186 /*@C
5187      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5188 
5189     Not Collective
5190 
5191    Input Parameters:
5192 +    A - the matrix
5193 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5194 -    row, col - index sets of rows and columns to extract (or NULL)
5195 
5196    Output Parameter:
5197 .    A_loc - the local sequential matrix generated
5198 
5199     Level: developer
5200 
5201 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5202 
5203 @*/
5204 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5205 {
5206   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5207   PetscErrorCode ierr;
5208   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5209   IS             isrowa,iscola;
5210   Mat            *aloc;
5211   PetscBool      match;
5212 
5213   PetscFunctionBegin;
5214   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5215   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5216   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5217   if (!row) {
5218     start = A->rmap->rstart; end = A->rmap->rend;
5219     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5220   } else {
5221     isrowa = *row;
5222   }
5223   if (!col) {
5224     start = A->cmap->rstart;
5225     cmap  = a->garray;
5226     nzA   = a->A->cmap->n;
5227     nzB   = a->B->cmap->n;
5228     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5229     ncols = 0;
5230     for (i=0; i<nzB; i++) {
5231       if (cmap[i] < start) idx[ncols++] = cmap[i];
5232       else break;
5233     }
5234     imark = i;
5235     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5236     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5237     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5238   } else {
5239     iscola = *col;
5240   }
5241   if (scall != MAT_INITIAL_MATRIX) {
5242     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5243     aloc[0] = *A_loc;
5244   }
5245   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5246   if (!col) { /* attach global id of condensed columns */
5247     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5248   }
5249   *A_loc = aloc[0];
5250   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5251   if (!row) {
5252     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5253   }
5254   if (!col) {
5255     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5256   }
5257   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5258   PetscFunctionReturn(0);
5259 }
5260 
5261 /*
5262  * Destroy a mat that may be composed with PetscSF communication objects.
5263  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5264  * */
5265 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5266 {
5267   PetscSF          sf,osf;
5268   IS               map;
5269   PetscErrorCode   ierr;
5270 
5271   PetscFunctionBegin;
5272   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5273   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5274   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5275   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5276   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5277   ierr = ISDestroy(&map);CHKERRQ(ierr);
5278   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5279   PetscFunctionReturn(0);
5280 }
5281 
5282 /*
5283  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5284  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5285  * on a global size.
5286  * */
5287 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5288 {
5289   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5290   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5291   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol;
5292   PetscSFNode              *iremote,*oiremote;
5293   const PetscInt           *lrowindices;
5294   PetscErrorCode           ierr;
5295   PetscSF                  sf,osf;
5296   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5297   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5298   MPI_Comm                 comm;
5299   ISLocalToGlobalMapping   mapping;
5300 
5301   PetscFunctionBegin;
5302   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5303   /* plocalsize is the number of roots
5304    * nrows is the number of leaves
5305    * */
5306   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5307   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5308   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5309   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5310   for (i=0;i<nrows;i++) {
5311     /* Find a remote index and an owner for a row
5312      * The row could be local or remote
5313      * */
5314     owner = 0;
5315     lidx  = 0;
5316     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5317     iremote[i].index = lidx;
5318     iremote[i].rank  = owner;
5319   }
5320   /* Create SF to communicate how many nonzero columns for each row */
5321   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5322   /* SF will figure out the number of nonzero colunms for each row, and their
5323    * offsets
5324    * */
5325   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5326   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5327   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5328 
5329   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5330   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5331   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5332   roffsets[0] = 0;
5333   roffsets[1] = 0;
5334   for (i=0;i<plocalsize;i++) {
5335     /* diag */
5336     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5337     /* off diag */
5338     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5339     /* compute offsets so that we relative location for each row */
5340     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5341     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5342   }
5343   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5344   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5345   /* 'r' means root, and 'l' means leaf */
5346   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5347   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5348   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5349   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5350   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5351   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5352   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5353   dntotalcols = 0;
5354   ontotalcols = 0;
5355   ncol = 0;
5356   for (i=0;i<nrows;i++) {
5357     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5358     ncol = PetscMax(pnnz[i],ncol);
5359     /* diag */
5360     dntotalcols += nlcols[i*2+0];
5361     /* off diag */
5362     ontotalcols += nlcols[i*2+1];
5363   }
5364   /* We do not need to figure the right number of columns
5365    * since all the calculations will be done by going through the raw data
5366    * */
5367   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5368   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5369   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5370   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5371   /* diag */
5372   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5373   /* off diag */
5374   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5375   /* diag */
5376   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5377   /* off diag */
5378   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5379   dntotalcols = 0;
5380   ontotalcols = 0;
5381   ntotalcols  = 0;
5382   for (i=0;i<nrows;i++) {
5383     owner = 0;
5384     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5385     /* Set iremote for diag matrix */
5386     for (j=0;j<nlcols[i*2+0];j++) {
5387       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5388       iremote[dntotalcols].rank    = owner;
5389       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5390       ilocal[dntotalcols++]        = ntotalcols++;
5391     }
5392     /* off diag */
5393     for (j=0;j<nlcols[i*2+1];j++) {
5394       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5395       oiremote[ontotalcols].rank    = owner;
5396       oilocal[ontotalcols++]        = ntotalcols++;
5397     }
5398   }
5399   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5400   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5401   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5402   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5403   /* P serves as roots and P_oth is leaves
5404    * Diag matrix
5405    * */
5406   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5407   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5408   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5409 
5410   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5411   /* Off diag */
5412   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5413   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5414   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5415   /* We operate on the matrix internal data for saving memory */
5416   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5417   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5418   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5419   /* Convert to global indices for diag matrix */
5420   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5421   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5422   /* We want P_oth store global indices */
5423   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5424   /* Use memory scalable approach */
5425   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5426   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5427   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5428   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5429   /* Convert back to local indices */
5430   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5431   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5432   nout = 0;
5433   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5434   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5435   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5436   /* Exchange values */
5437   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5438   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5439   /* Stop PETSc from shrinking memory */
5440   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5441   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5442   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5443   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5444   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5445   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5446   /* ``New MatDestroy" takes care of PetscSF objects as well */
5447   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5448   PetscFunctionReturn(0);
5449 }
5450 
5451 /*
5452  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5453  * This supports MPIAIJ and MAIJ
5454  * */
5455 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5456 {
5457   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5458   Mat_SeqAIJ            *p_oth;
5459   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5460   IS                    rows,map;
5461   PetscHMapI            hamp;
5462   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5463   MPI_Comm              comm;
5464   PetscSF               sf,osf;
5465   PetscBool             has;
5466   PetscErrorCode        ierr;
5467 
5468   PetscFunctionBegin;
5469   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5470   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5471   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5472    *  and then create a submatrix (that often is an overlapping matrix)
5473    * */
5474   if (reuse==MAT_INITIAL_MATRIX) {
5475     /* Use a hash table to figure out unique keys */
5476     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5477     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5478     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5479     count = 0;
5480     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5481     for (i=0;i<a->B->cmap->n;i++) {
5482       key  = a->garray[i]/dof;
5483       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5484       if (!has) {
5485         mapping[i] = count;
5486         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5487       } else {
5488         /* Current 'i' has the same value the previous step */
5489         mapping[i] = count-1;
5490       }
5491     }
5492     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5493     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5494     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5495     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5496     off = 0;
5497     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5498     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5499     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5500     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5501     /* In case, the matrix was already created but users want to recreate the matrix */
5502     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5503     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5504     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5505     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5506   } else if (reuse==MAT_REUSE_MATRIX) {
5507     /* If matrix was already created, we simply update values using SF objects
5508      * that as attached to the matrix ealier.
5509      *  */
5510     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5511     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5512     if (!sf || !osf) {
5513       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5514     }
5515     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5516     /* Update values in place */
5517     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5518     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5519     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5520     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5521   } else {
5522     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5523   }
5524   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5525   PetscFunctionReturn(0);
5526 }
5527 
5528 /*@C
5529     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5530 
5531     Collective on Mat
5532 
5533    Input Parameters:
5534 +    A,B - the matrices in mpiaij format
5535 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5536 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5537 
5538    Output Parameter:
5539 +    rowb, colb - index sets of rows and columns of B to extract
5540 -    B_seq - the sequential matrix generated
5541 
5542     Level: developer
5543 
5544 @*/
5545 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5546 {
5547   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5548   PetscErrorCode ierr;
5549   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5550   IS             isrowb,iscolb;
5551   Mat            *bseq=NULL;
5552 
5553   PetscFunctionBegin;
5554   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5555     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5556   }
5557   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5558 
5559   if (scall == MAT_INITIAL_MATRIX) {
5560     start = A->cmap->rstart;
5561     cmap  = a->garray;
5562     nzA   = a->A->cmap->n;
5563     nzB   = a->B->cmap->n;
5564     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5565     ncols = 0;
5566     for (i=0; i<nzB; i++) {  /* row < local row index */
5567       if (cmap[i] < start) idx[ncols++] = cmap[i];
5568       else break;
5569     }
5570     imark = i;
5571     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5572     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5573     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5574     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5575   } else {
5576     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5577     isrowb  = *rowb; iscolb = *colb;
5578     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5579     bseq[0] = *B_seq;
5580   }
5581   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5582   *B_seq = bseq[0];
5583   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5584   if (!rowb) {
5585     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5586   } else {
5587     *rowb = isrowb;
5588   }
5589   if (!colb) {
5590     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5591   } else {
5592     *colb = iscolb;
5593   }
5594   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5595   PetscFunctionReturn(0);
5596 }
5597 
5598 /*
5599     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5600     of the OFF-DIAGONAL portion of local A
5601 
5602     Collective on Mat
5603 
5604    Input Parameters:
5605 +    A,B - the matrices in mpiaij format
5606 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5607 
5608    Output Parameter:
5609 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5610 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5611 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5612 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5613 
5614     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5615      for this matrix. This is not desirable..
5616 
5617     Level: developer
5618 
5619 */
5620 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5621 {
5622   PetscErrorCode         ierr;
5623   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5624   Mat_SeqAIJ             *b_oth;
5625   VecScatter             ctx;
5626   MPI_Comm               comm;
5627   const PetscMPIInt      *rprocs,*sprocs;
5628   const PetscInt         *srow,*rstarts,*sstarts;
5629   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5630   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5631   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5632   MPI_Request            *rwaits = NULL,*swaits = NULL;
5633   MPI_Status             rstatus;
5634   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5635 
5636   PetscFunctionBegin;
5637   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5638   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5639 
5640   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5641     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5642   }
5643   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5644   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5645 
5646   if (size == 1) {
5647     startsj_s = NULL;
5648     bufa_ptr  = NULL;
5649     *B_oth    = NULL;
5650     PetscFunctionReturn(0);
5651   }
5652 
5653   ctx = a->Mvctx;
5654   tag = ((PetscObject)ctx)->tag;
5655 
5656   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5657   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5658   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5659   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5660   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5661   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5662   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5663 
5664   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5665   if (scall == MAT_INITIAL_MATRIX) {
5666     /* i-array */
5667     /*---------*/
5668     /*  post receives */
5669     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5670     for (i=0; i<nrecvs; i++) {
5671       rowlen = rvalues + rstarts[i]*rbs;
5672       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5673       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5674     }
5675 
5676     /* pack the outgoing message */
5677     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5678 
5679     sstartsj[0] = 0;
5680     rstartsj[0] = 0;
5681     len         = 0; /* total length of j or a array to be sent */
5682     if (nsends) {
5683       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5684       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5685     }
5686     for (i=0; i<nsends; i++) {
5687       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5688       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5689       for (j=0; j<nrows; j++) {
5690         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5691         for (l=0; l<sbs; l++) {
5692           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5693 
5694           rowlen[j*sbs+l] = ncols;
5695 
5696           len += ncols;
5697           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5698         }
5699         k++;
5700       }
5701       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5702 
5703       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5704     }
5705     /* recvs and sends of i-array are completed */
5706     i = nrecvs;
5707     while (i--) {
5708       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5709     }
5710     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5711     ierr = PetscFree(svalues);CHKERRQ(ierr);
5712 
5713     /* allocate buffers for sending j and a arrays */
5714     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5715     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5716 
5717     /* create i-array of B_oth */
5718     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5719 
5720     b_othi[0] = 0;
5721     len       = 0; /* total length of j or a array to be received */
5722     k         = 0;
5723     for (i=0; i<nrecvs; i++) {
5724       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5725       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5726       for (j=0; j<nrows; j++) {
5727         b_othi[k+1] = b_othi[k] + rowlen[j];
5728         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5729         k++;
5730       }
5731       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5732     }
5733     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5734 
5735     /* allocate space for j and a arrrays of B_oth */
5736     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5737     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5738 
5739     /* j-array */
5740     /*---------*/
5741     /*  post receives of j-array */
5742     for (i=0; i<nrecvs; i++) {
5743       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5744       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5745     }
5746 
5747     /* pack the outgoing message j-array */
5748     if (nsends) k = sstarts[0];
5749     for (i=0; i<nsends; i++) {
5750       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5751       bufJ  = bufj+sstartsj[i];
5752       for (j=0; j<nrows; j++) {
5753         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5754         for (ll=0; ll<sbs; ll++) {
5755           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5756           for (l=0; l<ncols; l++) {
5757             *bufJ++ = cols[l];
5758           }
5759           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5760         }
5761       }
5762       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5763     }
5764 
5765     /* recvs and sends of j-array are completed */
5766     i = nrecvs;
5767     while (i--) {
5768       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5769     }
5770     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5771   } else if (scall == MAT_REUSE_MATRIX) {
5772     sstartsj = *startsj_s;
5773     rstartsj = *startsj_r;
5774     bufa     = *bufa_ptr;
5775     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5776     b_otha   = b_oth->a;
5777   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5778 
5779   /* a-array */
5780   /*---------*/
5781   /*  post receives of a-array */
5782   for (i=0; i<nrecvs; i++) {
5783     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5784     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5785   }
5786 
5787   /* pack the outgoing message a-array */
5788   if (nsends) k = sstarts[0];
5789   for (i=0; i<nsends; i++) {
5790     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5791     bufA  = bufa+sstartsj[i];
5792     for (j=0; j<nrows; j++) {
5793       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5794       for (ll=0; ll<sbs; ll++) {
5795         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5796         for (l=0; l<ncols; l++) {
5797           *bufA++ = vals[l];
5798         }
5799         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5800       }
5801     }
5802     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5803   }
5804   /* recvs and sends of a-array are completed */
5805   i = nrecvs;
5806   while (i--) {
5807     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5808   }
5809   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5810   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5811 
5812   if (scall == MAT_INITIAL_MATRIX) {
5813     /* put together the new matrix */
5814     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5815 
5816     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5817     /* Since these are PETSc arrays, change flags to free them as necessary. */
5818     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5819     b_oth->free_a  = PETSC_TRUE;
5820     b_oth->free_ij = PETSC_TRUE;
5821     b_oth->nonew   = 0;
5822 
5823     ierr = PetscFree(bufj);CHKERRQ(ierr);
5824     if (!startsj_s || !bufa_ptr) {
5825       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5826       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5827     } else {
5828       *startsj_s = sstartsj;
5829       *startsj_r = rstartsj;
5830       *bufa_ptr  = bufa;
5831     }
5832   }
5833 
5834   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5835   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5836   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5837   PetscFunctionReturn(0);
5838 }
5839 
5840 /*@C
5841   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5842 
5843   Not Collective
5844 
5845   Input Parameters:
5846 . A - The matrix in mpiaij format
5847 
5848   Output Parameter:
5849 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5850 . colmap - A map from global column index to local index into lvec
5851 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5852 
5853   Level: developer
5854 
5855 @*/
5856 #if defined(PETSC_USE_CTABLE)
5857 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5858 #else
5859 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5860 #endif
5861 {
5862   Mat_MPIAIJ *a;
5863 
5864   PetscFunctionBegin;
5865   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5866   PetscValidPointer(lvec, 2);
5867   PetscValidPointer(colmap, 3);
5868   PetscValidPointer(multScatter, 4);
5869   a = (Mat_MPIAIJ*) A->data;
5870   if (lvec) *lvec = a->lvec;
5871   if (colmap) *colmap = a->colmap;
5872   if (multScatter) *multScatter = a->Mvctx;
5873   PetscFunctionReturn(0);
5874 }
5875 
5876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5877 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5878 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5879 #if defined(PETSC_HAVE_MKL_SPARSE)
5880 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5881 #endif
5882 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5883 #if defined(PETSC_HAVE_ELEMENTAL)
5884 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5885 #endif
5886 #if defined(PETSC_HAVE_HYPRE)
5887 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5888 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5889 #endif
5890 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5892 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5893 
5894 /*
5895     Computes (B'*A')' since computing B*A directly is untenable
5896 
5897                n                       p                          p
5898         (              )       (              )         (                  )
5899       m (      A       )  *  n (       B      )   =   m (         C        )
5900         (              )       (              )         (                  )
5901 
5902 */
5903 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5904 {
5905   PetscErrorCode ierr;
5906   Mat            At,Bt,Ct;
5907 
5908   PetscFunctionBegin;
5909   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5910   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5911   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5912   ierr = MatDestroy(&At);CHKERRQ(ierr);
5913   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5914   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5915   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5916   PetscFunctionReturn(0);
5917 }
5918 
5919 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5920 {
5921   PetscErrorCode ierr;
5922   PetscInt       m=A->rmap->n,n=B->cmap->n;
5923   Mat            Cmat;
5924 
5925   PetscFunctionBegin;
5926   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5927   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5928   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5929   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5930   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5931   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5932   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5933   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5934 
5935   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5936 
5937   *C = Cmat;
5938   PetscFunctionReturn(0);
5939 }
5940 
5941 /* ----------------------------------------------------------------*/
5942 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5943 {
5944   PetscErrorCode ierr;
5945 
5946   PetscFunctionBegin;
5947   if (scall == MAT_INITIAL_MATRIX) {
5948     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5949     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5950     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5951   }
5952   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5953   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5954   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5955   PetscFunctionReturn(0);
5956 }
5957 
5958 /*MC
5959    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5960 
5961    Options Database Keys:
5962 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5963 
5964    Level: beginner
5965 
5966    Notes:
5967     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5968     in this case the values associated with the rows and columns one passes in are set to zero
5969     in the matrix
5970 
5971     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5972     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5973 
5974 .seealso: MatCreateAIJ()
5975 M*/
5976 
5977 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5978 {
5979   Mat_MPIAIJ     *b;
5980   PetscErrorCode ierr;
5981   PetscMPIInt    size;
5982 
5983   PetscFunctionBegin;
5984   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5985 
5986   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5987   B->data       = (void*)b;
5988   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5989   B->assembled  = PETSC_FALSE;
5990   B->insertmode = NOT_SET_VALUES;
5991   b->size       = size;
5992 
5993   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5994 
5995   /* build cache for off array entries formed */
5996   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5997 
5998   b->donotstash  = PETSC_FALSE;
5999   b->colmap      = 0;
6000   b->garray      = 0;
6001   b->roworiented = PETSC_TRUE;
6002 
6003   /* stuff used for matrix vector multiply */
6004   b->lvec  = NULL;
6005   b->Mvctx = NULL;
6006 
6007   /* stuff for MatGetRow() */
6008   b->rowindices   = 0;
6009   b->rowvalues    = 0;
6010   b->getrowactive = PETSC_FALSE;
6011 
6012   /* flexible pointer used in CUSP/CUSPARSE classes */
6013   b->spptr = NULL;
6014 
6015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6018   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6025 #if defined(PETSC_HAVE_MKL_SPARSE)
6026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6027 #endif
6028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6030 #if defined(PETSC_HAVE_ELEMENTAL)
6031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6032 #endif
6033 #if defined(PETSC_HAVE_HYPRE)
6034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6035 #endif
6036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6041 #if defined(PETSC_HAVE_HYPRE)
6042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6043 #endif
6044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6045   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6046   PetscFunctionReturn(0);
6047 }
6048 
6049 /*@C
6050      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6051          and "off-diagonal" part of the matrix in CSR format.
6052 
6053    Collective
6054 
6055    Input Parameters:
6056 +  comm - MPI communicator
6057 .  m - number of local rows (Cannot be PETSC_DECIDE)
6058 .  n - This value should be the same as the local size used in creating the
6059        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6060        calculated if N is given) For square matrices n is almost always m.
6061 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6062 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6063 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6064 .   j - column indices
6065 .   a - matrix values
6066 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6067 .   oj - column indices
6068 -   oa - matrix values
6069 
6070    Output Parameter:
6071 .   mat - the matrix
6072 
6073    Level: advanced
6074 
6075    Notes:
6076        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6077        must free the arrays once the matrix has been destroyed and not before.
6078 
6079        The i and j indices are 0 based
6080 
6081        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6082 
6083        This sets local rows and cannot be used to set off-processor values.
6084 
6085        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6086        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6087        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6088        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6089        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6090        communication if it is known that only local entries will be set.
6091 
6092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6093           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6094 @*/
6095 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6096 {
6097   PetscErrorCode ierr;
6098   Mat_MPIAIJ     *maij;
6099 
6100   PetscFunctionBegin;
6101   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6102   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6103   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6104   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6105   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6106   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6107   maij = (Mat_MPIAIJ*) (*mat)->data;
6108 
6109   (*mat)->preallocated = PETSC_TRUE;
6110 
6111   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6112   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6113 
6114   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6115   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6116 
6117   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6118   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6119   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6120   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6121 
6122   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6123   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6124   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6125   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6126   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6127   PetscFunctionReturn(0);
6128 }
6129 
6130 /*
6131     Special version for direct calls from Fortran
6132 */
6133 #include <petsc/private/fortranimpl.h>
6134 
6135 /* Change these macros so can be used in void function */
6136 #undef CHKERRQ
6137 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6138 #undef SETERRQ2
6139 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6140 #undef SETERRQ3
6141 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6142 #undef SETERRQ
6143 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6144 
6145 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6146 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6147 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6148 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6149 #else
6150 #endif
6151 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6152 {
6153   Mat            mat  = *mmat;
6154   PetscInt       m    = *mm, n = *mn;
6155   InsertMode     addv = *maddv;
6156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6157   PetscScalar    value;
6158   PetscErrorCode ierr;
6159 
6160   MatCheckPreallocated(mat,1);
6161   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6162 
6163 #if defined(PETSC_USE_DEBUG)
6164   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6165 #endif
6166   {
6167     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6168     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6169     PetscBool roworiented = aij->roworiented;
6170 
6171     /* Some Variables required in the macro */
6172     Mat        A                 = aij->A;
6173     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6174     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6175     MatScalar  *aa               = a->a;
6176     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6177     Mat        B                 = aij->B;
6178     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6179     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6180     MatScalar  *ba               = b->a;
6181 
6182     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6183     PetscInt  nonew = a->nonew;
6184     MatScalar *ap1,*ap2;
6185 
6186     PetscFunctionBegin;
6187     for (i=0; i<m; i++) {
6188       if (im[i] < 0) continue;
6189 #if defined(PETSC_USE_DEBUG)
6190       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6191 #endif
6192       if (im[i] >= rstart && im[i] < rend) {
6193         row      = im[i] - rstart;
6194         lastcol1 = -1;
6195         rp1      = aj + ai[row];
6196         ap1      = aa + ai[row];
6197         rmax1    = aimax[row];
6198         nrow1    = ailen[row];
6199         low1     = 0;
6200         high1    = nrow1;
6201         lastcol2 = -1;
6202         rp2      = bj + bi[row];
6203         ap2      = ba + bi[row];
6204         rmax2    = bimax[row];
6205         nrow2    = bilen[row];
6206         low2     = 0;
6207         high2    = nrow2;
6208 
6209         for (j=0; j<n; j++) {
6210           if (roworiented) value = v[i*n+j];
6211           else value = v[i+j*m];
6212           if (in[j] >= cstart && in[j] < cend) {
6213             col = in[j] - cstart;
6214             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6215             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6216           } else if (in[j] < 0) continue;
6217 #if defined(PETSC_USE_DEBUG)
6218           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6219           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6220 #endif
6221           else {
6222             if (mat->was_assembled) {
6223               if (!aij->colmap) {
6224                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6225               }
6226 #if defined(PETSC_USE_CTABLE)
6227               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6228               col--;
6229 #else
6230               col = aij->colmap[in[j]] - 1;
6231 #endif
6232               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6233               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6234                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6235                 col  =  in[j];
6236                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6237                 B     = aij->B;
6238                 b     = (Mat_SeqAIJ*)B->data;
6239                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6240                 rp2   = bj + bi[row];
6241                 ap2   = ba + bi[row];
6242                 rmax2 = bimax[row];
6243                 nrow2 = bilen[row];
6244                 low2  = 0;
6245                 high2 = nrow2;
6246                 bm    = aij->B->rmap->n;
6247                 ba    = b->a;
6248               }
6249             } else col = in[j];
6250             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6251           }
6252         }
6253       } else if (!aij->donotstash) {
6254         if (roworiented) {
6255           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6256         } else {
6257           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6258         }
6259       }
6260     }
6261   }
6262   PetscFunctionReturnVoid();
6263 }
6264