xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 55e7fe800d976e85ed2b5cd8bfdef564daa37bd9)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscBool      lf;
1105   PetscMPIInt    size;
1106 
1107   PetscFunctionBegin;
1108   /* Easy test: symmetric diagonal block */
1109   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1110   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1111   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1112   if (!*f) PetscFunctionReturn(0);
1113   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1115   if (size == 1) PetscFunctionReturn(0);
1116 
1117   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1118   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1119   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1120   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1121   for (i=0; i<first; i++) notme[i] = i;
1122   for (i=last; i<M; i++) notme[i-last+first] = i;
1123   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1124   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1125   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1126   Aoff = Aoffs[0];
1127   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1128   Boff = Boffs[0];
1129   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1130   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1131   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1132   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1133   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1134   ierr = PetscFree(notme);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* send it on its way */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   /* do local part */
1158   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   /* receive remote parts */
1160   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165   This only works correctly for square matrices where the subblock A->A is the
1166    diagonal block
1167 */
1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1169 {
1170   PetscErrorCode ierr;
1171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1172 
1173   PetscFunctionBegin;
1174   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1175   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1176   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1181 {
1182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1187   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1192 {
1193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197 #if defined(PETSC_USE_LOG)
1198   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1199 #endif
1200   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1201   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1202   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1203   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1204 #if defined(PETSC_USE_CTABLE)
1205   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1206 #else
1207   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1208 #endif
1209   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1210   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1212   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1213   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1214   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1215   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_ELEMENTAL)
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1228 #endif
1229 #if defined(PETSC_HAVE_HYPRE)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1239 {
1240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1241   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1242   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1243   PetscErrorCode ierr;
1244   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1245   int            fd;
1246   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1247   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1248   PetscScalar    *column_values;
1249   PetscInt       message_count,flowcontrolcount;
1250   FILE           *file;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1255   nz   = A->nz + B->nz;
1256   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257   if (!rank) {
1258     header[0] = MAT_FILE_CLASSID;
1259     header[1] = mat->rmap->N;
1260     header[2] = mat->cmap->N;
1261 
1262     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     /* get largest number of rows any processor has */
1265     rlen  = mat->rmap->n;
1266     range = mat->rmap->range;
1267     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1268   } else {
1269     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     rlen = mat->rmap->n;
1271   }
1272 
1273   /* load up the local row counts */
1274   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1275   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1276 
1277   /* store the row lengths to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1283       rlen = range[i+1] - range[i];
1284       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1290     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1291     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1292   }
1293   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1294 
1295   /* load up the local column indices */
1296   nzmax = nz; /* th processor needs space a largest processor needs */
1297   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1299   cnt   = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ((col = garray[B->j[j]]) > cstart) break;
1303       column_indices[cnt++] = col;
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1306     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1307   }
1308   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1309 
1310   /* store the column indices to the file */
1311   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1312   if (!rank) {
1313     MPI_Status status;
1314     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     for (i=1; i<size; i++) {
1316       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1317       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1318       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1319       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1321     }
1322     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1323   } else {
1324     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1325     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1326     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1328   }
1329   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1330 
1331   /* load up the local column values */
1332   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1333   cnt  = 0;
1334   for (i=0; i<mat->rmap->n; i++) {
1335     for (j=B->i[i]; j<B->i[i+1]; j++) {
1336       if (garray[B->j[j]] > cstart) break;
1337       column_values[cnt++] = B->a[j];
1338     }
1339     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1340     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1341   }
1342   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1343 
1344   /* store the column values to the file */
1345   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1346   if (!rank) {
1347     MPI_Status status;
1348     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1349     for (i=1; i<size; i++) {
1350       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1351       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1352       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1353       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1360     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(column_values);CHKERRQ(ierr);
1364 
1365   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1366   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #include <petscdraw.h>
1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1372 {
1373   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1374   PetscErrorCode    ierr;
1375   PetscMPIInt       rank = aij->rank,size = aij->size;
1376   PetscBool         isdraw,iascii,isbinary;
1377   PetscViewer       sviewer;
1378   PetscViewerFormat format;
1379 
1380   PetscFunctionBegin;
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1382   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1384   if (iascii) {
1385     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1386     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1387       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1388       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1389       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       for (i=0; i<(PetscInt)size; i++) {
1391         nmax = PetscMax(nmax,nz[i]);
1392         nmin = PetscMin(nmin,nz[i]);
1393         navg += nz[i];
1394       }
1395       ierr = PetscFree(nz);CHKERRQ(ierr);
1396       navg = navg/size;
1397       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1398       PetscFunctionReturn(0);
1399     }
1400     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1401     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1402       MatInfo   info;
1403       PetscBool inodes;
1404 
1405       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1406       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1407       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1409       if (!inodes) {
1410         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1411                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1412       } else {
1413         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1414                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1415       }
1416       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1420       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1421       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1422       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1423       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1424       PetscFunctionReturn(0);
1425     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1426       PetscInt inodecount,inodelimit,*inodes;
1427       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1428       if (inodes) {
1429         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1430       } else {
1431         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1432       }
1433       PetscFunctionReturn(0);
1434     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1435       PetscFunctionReturn(0);
1436     }
1437   } else if (isbinary) {
1438     if (size == 1) {
1439       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1440       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1441     } else {
1442       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1443     }
1444     PetscFunctionReturn(0);
1445   } else if (isdraw) {
1446     PetscDraw draw;
1447     PetscBool isnull;
1448     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1449     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1450     if (isnull) PetscFunctionReturn(0);
1451   }
1452 
1453   {
1454     /* assemble the entire matrix onto first processor. */
1455     Mat        A;
1456     Mat_SeqAIJ *Aloc;
1457     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1458     MatScalar  *a;
1459 
1460     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1461     if (!rank) {
1462       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1463     } else {
1464       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1465     }
1466     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1467     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1468     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1469     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1470     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1471 
1472     /* copy over the A part */
1473     Aloc = (Mat_SeqAIJ*)aij->A->data;
1474     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475     row  = mat->rmap->rstart;
1476     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1477     for (i=0; i<m; i++) {
1478       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1479       row++;
1480       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1481     }
1482     aj = Aloc->j;
1483     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1484 
1485     /* copy over the B part */
1486     Aloc = (Mat_SeqAIJ*)aij->B->data;
1487     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1488     row  = mat->rmap->rstart;
1489     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1490     ct   = cols;
1491     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1492     for (i=0; i<m; i++) {
1493       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1494       row++;
1495       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1496     }
1497     ierr = PetscFree(ct);CHKERRQ(ierr);
1498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1500     /*
1501        Everyone has to call to draw the matrix since the graphics waits are
1502        synchronized across all processors that share the PetscDraw object
1503     */
1504     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     if (!rank) {
1506       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1508     }
1509     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1510     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1517 {
1518   PetscErrorCode ierr;
1519   PetscBool      iascii,isdraw,issocket,isbinary;
1520 
1521   PetscFunctionBegin;
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1526   if (iascii || isdraw || isbinary || issocket) {
1527     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1533 {
1534   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1535   PetscErrorCode ierr;
1536   Vec            bb1 = 0;
1537   PetscBool      hasop;
1538 
1539   PetscFunctionBegin;
1540   if (flag == SOR_APPLY_UPPER) {
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542     PetscFunctionReturn(0);
1543   }
1544 
1545   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1547   }
1548 
1549   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554 
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587     while (its--) {
1588       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590 
1591       /* update rhs: bb1 = bb - B*x */
1592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1594 
1595       /* local sweep */
1596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1597     }
1598   } else if (flag & SOR_EISENSTAT) {
1599     Vec xx1;
1600 
1601     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1603 
1604     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     if (!mat->diag) {
1607       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1608       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1609     }
1610     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1611     if (hasop) {
1612       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1613     } else {
1614       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1615     }
1616     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1617 
1618     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1619 
1620     /* local sweep */
1621     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1622     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1623     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1624   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1625 
1626   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1627 
1628   matin->factorerrortype = mat->A->factorerrortype;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1633 {
1634   Mat            aA,aB,Aperm;
1635   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1636   PetscScalar    *aa,*ba;
1637   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1638   PetscSF        rowsf,sf;
1639   IS             parcolp = NULL;
1640   PetscBool      done;
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1645   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1648 
1649   /* Invert row permutation to find out where my rows should go */
1650   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1651   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1652   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1654   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656 
1657   /* Invert column permutation to find out where my columns should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1661   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1662   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1665 
1666   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1667   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1668   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1669 
1670   /* Find out where my gcols should go */
1671   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1672   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1674   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1675   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1676   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1679 
1680   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1681   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     PetscInt row = rdest[i],rowner;
1685     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1686     for (j=ai[i]; j<ai[i+1]; j++) {
1687       PetscInt cowner,col = cdest[aj[j]];
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692     for (j=bi[i]; j<bi[i+1]; j++) {
1693       PetscInt cowner,col = gcdest[bj[j]];
1694       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1695       if (rowner == cowner) dnnz[i]++;
1696       else onnz[i]++;
1697     }
1698   }
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1704 
1705   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1706   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1708   for (i=0; i<m; i++) {
1709     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1710     PetscInt j0,rowlen;
1711     rowlen = ai[i+1] - ai[i];
1712     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716     rowlen = bi[i+1] - bi[i];
1717     for (j0=j=0; j<rowlen; j0=j) {
1718       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1719       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1720     }
1721   }
1722   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1726   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1728   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1729   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1730   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1731   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1732   *B = Aperm;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1737 {
1738   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1743   if (ghosts) *ghosts = aij->garray;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A    = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size = 1.0;
1756   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1759   isend[3] = info->memory;  isend[4] = info->mallocs;
1760 
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762 
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1773 
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   } else if (flag == MAT_GLOBAL_SUM) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   }
1788   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1789   info->fill_ratio_needed = 0;
1790   info->factor_mallocs    = 0;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     MatCheckPreallocated(A,1);
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     MatCheckPreallocated(A,1);
1814     a->roworiented = flg;
1815 
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_NEW_DIAGONALS:
1820     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1821     break;
1822   case MAT_IGNORE_OFF_PROC_ENTRIES:
1823     a->donotstash = flg;
1824     break;
1825   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1826   case MAT_SPD:
1827   case MAT_SYMMETRIC:
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829   case MAT_HERMITIAN:
1830   case MAT_SYMMETRY_ETERNAL:
1831     break;
1832   case MAT_SUBMAT_SINGLEIS:
1833     A->submat_singleis = flg;
1834     break;
1835   case MAT_STRUCTURE_ONLY:
1836     /* The option is handled directly by MatSetOption() */
1837     break;
1838   default:
1839     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1840   }
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1845 {
1846   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1847   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1848   PetscErrorCode ierr;
1849   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1850   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1851   PetscInt       *cmap,*idx_p;
1852 
1853   PetscFunctionBegin;
1854   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1855   mat->getrowactive = PETSC_TRUE;
1856 
1857   if (!mat->rowvalues && (idx || v)) {
1858     /*
1859         allocate enough space to hold information from the longest row.
1860     */
1861     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1862     PetscInt   max = 1,tmp;
1863     for (i=0; i<matin->rmap->n; i++) {
1864       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1865       if (max < tmp) max = tmp;
1866     }
1867     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1868   }
1869 
1870   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1871   lrow = row - rstart;
1872 
1873   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1874   if (!v)   {pvA = 0; pvB = 0;}
1875   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1876   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1877   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1878   nztot = nzA + nzB;
1879 
1880   cmap = mat->garray;
1881   if (v  || idx) {
1882     if (nztot) {
1883       /* Sort by increasing column numbers, assuming A and B already sorted */
1884       PetscInt imark = -1;
1885       if (v) {
1886         *v = v_p = mat->rowvalues;
1887         for (i=0; i<nzB; i++) {
1888           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1889           else break;
1890         }
1891         imark = i;
1892         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1893         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1894       }
1895       if (idx) {
1896         *idx = idx_p = mat->rowindices;
1897         if (imark > -1) {
1898           for (i=0; i<imark; i++) {
1899             idx_p[i] = cmap[cworkB[i]];
1900           }
1901         } else {
1902           for (i=0; i<nzB; i++) {
1903             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1904             else break;
1905           }
1906           imark = i;
1907         }
1908         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1909         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1910       }
1911     } else {
1912       if (idx) *idx = 0;
1913       if (v)   *v   = 0;
1914     }
1915   }
1916   *nz  = nztot;
1917   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1918   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1923 {
1924   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1925 
1926   PetscFunctionBegin;
1927   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1928   aij->getrowactive = PETSC_FALSE;
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1933 {
1934   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1935   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1936   PetscErrorCode ierr;
1937   PetscInt       i,j,cstart = mat->cmap->rstart;
1938   PetscReal      sum = 0.0;
1939   MatScalar      *v;
1940 
1941   PetscFunctionBegin;
1942   if (aij->size == 1) {
1943     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1944   } else {
1945     if (type == NORM_FROBENIUS) {
1946       v = amat->a;
1947       for (i=0; i<amat->nz; i++) {
1948         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1949       }
1950       v = bmat->a;
1951       for (i=0; i<bmat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1955       *norm = PetscSqrtReal(*norm);
1956       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1957     } else if (type == NORM_1) { /* max column norm */
1958       PetscReal *tmp,*tmp2;
1959       PetscInt  *jj,*garray = aij->garray;
1960       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1961       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1962       *norm = 0.0;
1963       v     = amat->a; jj = amat->j;
1964       for (j=0; j<amat->nz; j++) {
1965         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1966       }
1967       v = bmat->a; jj = bmat->j;
1968       for (j=0; j<bmat->nz; j++) {
1969         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1970       }
1971       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       for (j=0; j<mat->cmap->N; j++) {
1973         if (tmp2[j] > *norm) *norm = tmp2[j];
1974       }
1975       ierr = PetscFree(tmp);CHKERRQ(ierr);
1976       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1977       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1978     } else if (type == NORM_INFINITY) { /* max row norm */
1979       PetscReal ntemp = 0.0;
1980       for (j=0; j<aij->A->rmap->n; j++) {
1981         v   = amat->a + amat->i[j];
1982         sum = 0.0;
1983         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1984           sum += PetscAbsScalar(*v); v++;
1985         }
1986         v = bmat->a + bmat->i[j];
1987         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         if (sum > ntemp) ntemp = sum;
1991       }
1992       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1993       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1994     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2000 {
2001   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2002   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2003   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2004   PetscErrorCode ierr;
2005   Mat            B,A_diag,*B_diag;
2006   MatScalar      *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2237     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2238   } else {
2239     Mat      B;
2240     PetscInt *nnz_d,*nnz_o;
2241     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2242     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2243     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2244     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2245     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2246     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2247     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2248     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2250     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2251     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2252     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2253     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2260 
2261 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2262 {
2263 #if defined(PETSC_USE_COMPLEX)
2264   PetscErrorCode ierr;
2265   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2269   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2270 #else
2271   PetscFunctionBegin;
2272 #endif
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2277 {
2278   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2283   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2288 {
2289   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2294   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2299 {
2300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2301   PetscErrorCode ierr;
2302   PetscInt       i,*idxb = 0;
2303   PetscScalar    *va,*vb;
2304   Vec            vtmp;
2305 
2306   PetscFunctionBegin;
2307   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2308   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2309   if (idx) {
2310     for (i=0; i<A->rmap->n; i++) {
2311       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2312     }
2313   }
2314 
2315   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2316   if (idx) {
2317     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2318   }
2319   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2320   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2321 
2322   for (i=0; i<A->rmap->n; i++) {
2323     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2324       va[i] = vb[i];
2325       if (idx) idx[i] = a->garray[idxb[i]];
2326     }
2327   }
2328 
2329   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2330   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2331   ierr = PetscFree(idxb);CHKERRQ(ierr);
2332   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2337 {
2338   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2339   PetscErrorCode ierr;
2340   PetscInt       i,*idxb = 0;
2341   PetscScalar    *va,*vb;
2342   Vec            vtmp;
2343 
2344   PetscFunctionBegin;
2345   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2346   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2347   if (idx) {
2348     for (i=0; i<A->cmap->n; i++) {
2349       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2350     }
2351   }
2352 
2353   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2354   if (idx) {
2355     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2356   }
2357   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2358   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2359 
2360   for (i=0; i<A->rmap->n; i++) {
2361     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2362       va[i] = vb[i];
2363       if (idx) idx[i] = a->garray[idxb[i]];
2364     }
2365   }
2366 
2367   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2368   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2369   ierr = PetscFree(idxb);CHKERRQ(ierr);
2370   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2375 {
2376   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2377   PetscInt       n      = A->rmap->n;
2378   PetscInt       cstart = A->cmap->rstart;
2379   PetscInt       *cmap  = mat->garray;
2380   PetscInt       *diagIdx, *offdiagIdx;
2381   Vec            diagV, offdiagV;
2382   PetscScalar    *a, *diagA, *offdiagA;
2383   PetscInt       r;
2384   PetscErrorCode ierr;
2385 
2386   PetscFunctionBegin;
2387   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2388   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2390   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2391   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2392   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2393   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2394   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2395   for (r = 0; r < n; ++r) {
2396     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2397       a[r]   = diagA[r];
2398       idx[r] = cstart + diagIdx[r];
2399     } else {
2400       a[r]   = offdiagA[r];
2401       idx[r] = cmap[offdiagIdx[r]];
2402     }
2403   }
2404   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2408   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2409   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2414 {
2415   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2416   PetscInt       n      = A->rmap->n;
2417   PetscInt       cstart = A->cmap->rstart;
2418   PetscInt       *cmap  = mat->garray;
2419   PetscInt       *diagIdx, *offdiagIdx;
2420   Vec            diagV, offdiagV;
2421   PetscScalar    *a, *diagA, *offdiagA;
2422   PetscInt       r;
2423   PetscErrorCode ierr;
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2427   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2428   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2429   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2430   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2432   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2433   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   for (r = 0; r < n; ++r) {
2435     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2436       a[r]   = diagA[r];
2437       idx[r] = cstart + diagIdx[r];
2438     } else {
2439       a[r]   = offdiagA[r];
2440       idx[r] = cmap[offdiagIdx[r]];
2441     }
2442   }
2443   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2444   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2448   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2453 {
2454   PetscErrorCode ierr;
2455   Mat            *dummy;
2456 
2457   PetscFunctionBegin;
2458   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2459   *newmat = *dummy;
2460   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2465 {
2466   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2467   PetscErrorCode ierr;
2468 
2469   PetscFunctionBegin;
2470   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2471   A->factorerrortype = a->A->factorerrortype;
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2476 {
2477   PetscErrorCode ierr;
2478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2479 
2480   PetscFunctionBegin;
2481   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2482   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2483   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2489 {
2490   PetscFunctionBegin;
2491   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2492   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 /*@
2497    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2498 
2499    Collective on Mat
2500 
2501    Input Parameters:
2502 +    A - the matrix
2503 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2504 
2505  Level: advanced
2506 
2507 @*/
2508 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2509 {
2510   PetscErrorCode       ierr;
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2518 {
2519   PetscErrorCode       ierr;
2520   PetscBool            sc = PETSC_FALSE,flg;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2524   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2525   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2526   if (flg) {
2527     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2528   }
2529   ierr = PetscOptionsTail();CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2534 {
2535   PetscErrorCode ierr;
2536   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2537   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2538 
2539   PetscFunctionBegin;
2540   if (!Y->preallocated) {
2541     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2542   } else if (!aij->nz) {
2543     PetscInt nonew = aij->nonew;
2544     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2545     aij->nonew = nonew;
2546   }
2547   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2552 {
2553   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2554   PetscErrorCode ierr;
2555 
2556   PetscFunctionBegin;
2557   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2558   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2559   if (d) {
2560     PetscInt rstart;
2561     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2562     *d += rstart;
2563 
2564   }
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2569 {
2570   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2571   PetscErrorCode ierr;
2572 
2573   PetscFunctionBegin;
2574   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 /* -------------------------------------------------------------------*/
2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2580                                        MatGetRow_MPIAIJ,
2581                                        MatRestoreRow_MPIAIJ,
2582                                        MatMult_MPIAIJ,
2583                                 /* 4*/ MatMultAdd_MPIAIJ,
2584                                        MatMultTranspose_MPIAIJ,
2585                                        MatMultTransposeAdd_MPIAIJ,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*10*/ 0,
2590                                        0,
2591                                        0,
2592                                        MatSOR_MPIAIJ,
2593                                        MatTranspose_MPIAIJ,
2594                                 /*15*/ MatGetInfo_MPIAIJ,
2595                                        MatEqual_MPIAIJ,
2596                                        MatGetDiagonal_MPIAIJ,
2597                                        MatDiagonalScale_MPIAIJ,
2598                                        MatNorm_MPIAIJ,
2599                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2600                                        MatAssemblyEnd_MPIAIJ,
2601                                        MatSetOption_MPIAIJ,
2602                                        MatZeroEntries_MPIAIJ,
2603                                 /*24*/ MatZeroRows_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*29*/ MatSetUp_MPIAIJ,
2609                                        0,
2610                                        0,
2611                                        MatGetDiagonalBlock_MPIAIJ,
2612                                        0,
2613                                 /*34*/ MatDuplicate_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*39*/ MatAXPY_MPIAIJ,
2619                                        MatCreateSubMatrices_MPIAIJ,
2620                                        MatIncreaseOverlap_MPIAIJ,
2621                                        MatGetValues_MPIAIJ,
2622                                        MatCopy_MPIAIJ,
2623                                 /*44*/ MatGetRowMax_MPIAIJ,
2624                                        MatScale_MPIAIJ,
2625                                        MatShift_MPIAIJ,
2626                                        MatDiagonalSet_MPIAIJ,
2627                                        MatZeroRowsColumns_MPIAIJ,
2628                                 /*49*/ MatSetRandom_MPIAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2634                                        0,
2635                                        MatSetUnfactored_MPIAIJ,
2636                                        MatPermute_MPIAIJ,
2637                                        0,
2638                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2639                                        MatDestroy_MPIAIJ,
2640                                        MatView_MPIAIJ,
2641                                        0,
2642                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2643                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2644                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2649                                        MatGetRowMinAbs_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*75*/ MatFDColoringApply_AIJ,
2655                                        MatSetFromOptions_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        MatFindZeroDiagonals_MPIAIJ,
2659                                 /*80*/ 0,
2660                                        0,
2661                                        0,
2662                                 /*83*/ MatLoad_MPIAIJ,
2663                                        MatIsSymmetric_MPIAIJ,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2669                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2670                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2671                                        MatPtAP_MPIAIJ_MPIAIJ,
2672                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2673                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                 /*99*/ 0,
2679                                        0,
2680                                        0,
2681                                        MatConjugate_MPIAIJ,
2682                                        0,
2683                                 /*104*/MatSetValuesRow_MPIAIJ,
2684                                        MatRealPart_MPIAIJ,
2685                                        MatImaginaryPart_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                 /*109*/0,
2689                                        0,
2690                                        MatGetRowMin_MPIAIJ,
2691                                        0,
2692                                        MatMissingDiagonal_MPIAIJ,
2693                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2694                                        0,
2695                                        MatGetGhosts_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                 /*119*/0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        MatGetMultiProcBlock_MPIAIJ,
2703                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2704                                        MatGetColumnNorms_MPIAIJ,
2705                                        MatInvertBlockDiagonal_MPIAIJ,
2706                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2707                                        MatCreateSubMatricesMPI_MPIAIJ,
2708                                 /*129*/0,
2709                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2710                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2711                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2712                                        0,
2713                                 /*134*/0,
2714                                        0,
2715                                        MatRARt_MPIAIJ_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                 /*139*/MatSetBlockSizes_MPIAIJ,
2719                                        0,
2720                                        0,
2721                                        MatFDColoringSetUp_MPIXAIJ,
2722                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2723                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2724 };
2725 
2726 /* ----------------------------------------------------------------------------------------*/
2727 
2728 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2729 {
2730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2731   PetscErrorCode ierr;
2732 
2733   PetscFunctionBegin;
2734   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2735   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2740 {
2741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2746   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2751 {
2752   Mat_MPIAIJ     *b;
2753   PetscErrorCode ierr;
2754 
2755   PetscFunctionBegin;
2756   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2757   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2758   b = (Mat_MPIAIJ*)B->data;
2759 
2760 #if defined(PETSC_USE_CTABLE)
2761   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2762 #else
2763   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2764 #endif
2765   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2766   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2767   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2768 
2769   /* Because the B will have been resized we simply destroy it and create a new one each time */
2770   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2771   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2772   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2773   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2774   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2775   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2776 
2777   if (!B->preallocated) {
2778     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2779     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2780     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2781     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2782     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2783   }
2784 
2785   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2786   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2787   B->preallocated  = PETSC_TRUE;
2788   B->was_assembled = PETSC_FALSE;
2789   B->assembled     = PETSC_FALSE;;
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2794 {
2795   Mat_MPIAIJ     *b;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2800   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2802   b = (Mat_MPIAIJ*)B->data;
2803 
2804 #if defined(PETSC_USE_CTABLE)
2805   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2806 #else
2807   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2808 #endif
2809   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2810   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2811   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2812 
2813   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2814   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2815   B->preallocated  = PETSC_TRUE;
2816   B->was_assembled = PETSC_FALSE;
2817   B->assembled = PETSC_FALSE;
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2822 {
2823   Mat            mat;
2824   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   *newmat = 0;
2829   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2830   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2831   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2832   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2833   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2834   a       = (Mat_MPIAIJ*)mat->data;
2835 
2836   mat->factortype   = matin->factortype;
2837   mat->assembled    = PETSC_TRUE;
2838   mat->insertmode   = NOT_SET_VALUES;
2839   mat->preallocated = PETSC_TRUE;
2840 
2841   a->size         = oldmat->size;
2842   a->rank         = oldmat->rank;
2843   a->donotstash   = oldmat->donotstash;
2844   a->roworiented  = oldmat->roworiented;
2845   a->rowindices   = 0;
2846   a->rowvalues    = 0;
2847   a->getrowactive = PETSC_FALSE;
2848 
2849   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2850   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2851 
2852   if (oldmat->colmap) {
2853 #if defined(PETSC_USE_CTABLE)
2854     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2855 #else
2856     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2857     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2858     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2859 #endif
2860   } else a->colmap = 0;
2861   if (oldmat->garray) {
2862     PetscInt len;
2863     len  = oldmat->B->cmap->n;
2864     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2865     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2866     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2867   } else a->garray = 0;
2868 
2869   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2870   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2871   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2872   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2873 
2874   if (oldmat->Mvctx_mpi1) {
2875     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2876     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2877   }
2878 
2879   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2880   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2881   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2883   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2884   *newmat = mat;
2885   PetscFunctionReturn(0);
2886 }
2887 
2888 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2889 {
2890   PetscBool      isbinary, ishdf5;
2891   PetscErrorCode ierr;
2892 
2893   PetscFunctionBegin;
2894   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2895   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2896   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2897   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2898   if (isbinary) {
2899     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2900   } else if (ishdf5) {
2901 #if defined(PETSC_HAVE_HDF5)
2902     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2903 #else
2904     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2905 #endif
2906   } else {
2907     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2908   }
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2913 {
2914   PetscScalar    *vals,*svals;
2915   MPI_Comm       comm;
2916   PetscErrorCode ierr;
2917   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2918   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2919   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2920   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2921   PetscInt       cend,cstart,n,*rowners;
2922   int            fd;
2923   PetscInt       bs = newMat->rmap->bs;
2924 
2925   PetscFunctionBegin;
2926   /* force binary viewer to load .info file if it has not yet done so */
2927   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2928   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2929   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2930   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2931   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2932   if (!rank) {
2933     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2934     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2935     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2936   }
2937 
2938   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2939   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2940   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2941   if (bs < 0) bs = 1;
2942 
2943   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2944   M    = header[1]; N = header[2];
2945 
2946   /* If global sizes are set, check if they are consistent with that given in the file */
2947   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2948   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2949 
2950   /* determine ownership of all (block) rows */
2951   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2952   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2953   else m = newMat->rmap->n; /* Set by user */
2954 
2955   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2956   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2957 
2958   /* First process needs enough room for process with most rows */
2959   if (!rank) {
2960     mmax = rowners[1];
2961     for (i=2; i<=size; i++) {
2962       mmax = PetscMax(mmax, rowners[i]);
2963     }
2964   } else mmax = -1;             /* unused, but compilers complain */
2965 
2966   rowners[0] = 0;
2967   for (i=2; i<=size; i++) {
2968     rowners[i] += rowners[i-1];
2969   }
2970   rstart = rowners[rank];
2971   rend   = rowners[rank+1];
2972 
2973   /* distribute row lengths to all processors */
2974   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2975   if (!rank) {
2976     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2977     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2978     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2979     for (j=0; j<m; j++) {
2980       procsnz[0] += ourlens[j];
2981     }
2982     for (i=1; i<size; i++) {
2983       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2984       /* calculate the number of nonzeros on each processor */
2985       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2986         procsnz[i] += rowlengths[j];
2987       }
2988       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2989     }
2990     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2991   } else {
2992     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2993   }
2994 
2995   if (!rank) {
2996     /* determine max buffer needed and allocate it */
2997     maxnz = 0;
2998     for (i=0; i<size; i++) {
2999       maxnz = PetscMax(maxnz,procsnz[i]);
3000     }
3001     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3002 
3003     /* read in my part of the matrix column indices  */
3004     nz   = procsnz[0];
3005     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3006     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3007 
3008     /* read in every one elses and ship off */
3009     for (i=1; i<size; i++) {
3010       nz   = procsnz[i];
3011       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3012       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3013     }
3014     ierr = PetscFree(cols);CHKERRQ(ierr);
3015   } else {
3016     /* determine buffer space needed for message */
3017     nz = 0;
3018     for (i=0; i<m; i++) {
3019       nz += ourlens[i];
3020     }
3021     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3022 
3023     /* receive message of column indices*/
3024     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3025   }
3026 
3027   /* determine column ownership if matrix is not square */
3028   if (N != M) {
3029     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3030     else n = newMat->cmap->n;
3031     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3032     cstart = cend - n;
3033   } else {
3034     cstart = rstart;
3035     cend   = rend;
3036     n      = cend - cstart;
3037   }
3038 
3039   /* loop over local rows, determining number of off diagonal entries */
3040   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3041   jj   = 0;
3042   for (i=0; i<m; i++) {
3043     for (j=0; j<ourlens[i]; j++) {
3044       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3045       jj++;
3046     }
3047   }
3048 
3049   for (i=0; i<m; i++) {
3050     ourlens[i] -= offlens[i];
3051   }
3052   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3053 
3054   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3055 
3056   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3057 
3058   for (i=0; i<m; i++) {
3059     ourlens[i] += offlens[i];
3060   }
3061 
3062   if (!rank) {
3063     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3064 
3065     /* read in my part of the matrix numerical values  */
3066     nz   = procsnz[0];
3067     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3068 
3069     /* insert into matrix */
3070     jj      = rstart;
3071     smycols = mycols;
3072     svals   = vals;
3073     for (i=0; i<m; i++) {
3074       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3075       smycols += ourlens[i];
3076       svals   += ourlens[i];
3077       jj++;
3078     }
3079 
3080     /* read in other processors and ship out */
3081     for (i=1; i<size; i++) {
3082       nz   = procsnz[i];
3083       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3084       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3085     }
3086     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3087   } else {
3088     /* receive numeric values */
3089     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3090 
3091     /* receive message of values*/
3092     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3093 
3094     /* insert into matrix */
3095     jj      = rstart;
3096     smycols = mycols;
3097     svals   = vals;
3098     for (i=0; i<m; i++) {
3099       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3100       smycols += ourlens[i];
3101       svals   += ourlens[i];
3102       jj++;
3103     }
3104   }
3105   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3106   ierr = PetscFree(vals);CHKERRQ(ierr);
3107   ierr = PetscFree(mycols);CHKERRQ(ierr);
3108   ierr = PetscFree(rowners);CHKERRQ(ierr);
3109   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 /* Not scalable because of ISAllGather() unless getting all columns. */
3115 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3116 {
3117   PetscErrorCode ierr;
3118   IS             iscol_local;
3119   PetscBool      isstride;
3120   PetscMPIInt    lisstride=0,gisstride;
3121 
3122   PetscFunctionBegin;
3123   /* check if we are grabbing all columns*/
3124   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3125 
3126   if (isstride) {
3127     PetscInt  start,len,mstart,mlen;
3128     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3129     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3130     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3131     if (mstart == start && mlen-mstart == len) lisstride = 1;
3132   }
3133 
3134   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3135   if (gisstride) {
3136     PetscInt N;
3137     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3138     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3139     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3140     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3141   } else {
3142     PetscInt cbs;
3143     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3144     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3145     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3146   }
3147 
3148   *isseq = iscol_local;
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 /*
3153  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3154  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3155 
3156  Input Parameters:
3157    mat - matrix
3158    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3159            i.e., mat->rstart <= isrow[i] < mat->rend
3160    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3161            i.e., mat->cstart <= iscol[i] < mat->cend
3162  Output Parameter:
3163    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3164    iscol_o - sequential column index set for retrieving mat->B
3165    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3166  */
3167 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3168 {
3169   PetscErrorCode ierr;
3170   Vec            x,cmap;
3171   const PetscInt *is_idx;
3172   PetscScalar    *xarray,*cmaparray;
3173   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3174   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3175   Mat            B=a->B;
3176   Vec            lvec=a->lvec,lcmap;
3177   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3178   MPI_Comm       comm;
3179   VecScatter     Mvctx=a->Mvctx;
3180 
3181   PetscFunctionBegin;
3182   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3183   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3184 
3185   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3186   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3187   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3188   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3189   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3190 
3191   /* Get start indices */
3192   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3193   isstart -= ncols;
3194   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3195 
3196   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3197   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3198   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3199   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3200   for (i=0; i<ncols; i++) {
3201     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3202     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3203     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3204   }
3205   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3207   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3208 
3209   /* Get iscol_d */
3210   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3211   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3212   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3213 
3214   /* Get isrow_d */
3215   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3216   rstart = mat->rmap->rstart;
3217   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3218   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3219   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3220   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3221 
3222   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3223   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3224   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3225 
3226   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3227   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3228   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3229 
3230   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3231 
3232   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3233   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3234 
3235   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3236   /* off-process column indices */
3237   count = 0;
3238   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3239   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3240 
3241   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3242   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3243   for (i=0; i<Bn; i++) {
3244     if (PetscRealPart(xarray[i]) > -1.0) {
3245       idx[count]     = i;                   /* local column index in off-diagonal part B */
3246       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3247       count++;
3248     }
3249   }
3250   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3251   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3252 
3253   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3254   /* cannot ensure iscol_o has same blocksize as iscol! */
3255 
3256   ierr = PetscFree(idx);CHKERRQ(ierr);
3257   *garray = cmap1;
3258 
3259   ierr = VecDestroy(&x);CHKERRQ(ierr);
3260   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3261   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3266 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3267 {
3268   PetscErrorCode ierr;
3269   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3270   Mat            M = NULL;
3271   MPI_Comm       comm;
3272   IS             iscol_d,isrow_d,iscol_o;
3273   Mat            Asub = NULL,Bsub = NULL;
3274   PetscInt       n;
3275 
3276   PetscFunctionBegin;
3277   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3278 
3279   if (call == MAT_REUSE_MATRIX) {
3280     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3281     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3282     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3283 
3284     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3285     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3286 
3287     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3288     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3289 
3290     /* Update diagonal and off-diagonal portions of submat */
3291     asub = (Mat_MPIAIJ*)(*submat)->data;
3292     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3293     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3294     if (n) {
3295       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3296     }
3297     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3298     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3299 
3300   } else { /* call == MAT_INITIAL_MATRIX) */
3301     const PetscInt *garray;
3302     PetscInt        BsubN;
3303 
3304     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3305     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3306 
3307     /* Create local submatrices Asub and Bsub */
3308     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3309     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3310 
3311     /* Create submatrix M */
3312     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3313 
3314     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3315     asub = (Mat_MPIAIJ*)M->data;
3316 
3317     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3318     n = asub->B->cmap->N;
3319     if (BsubN > n) {
3320       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3321       const PetscInt *idx;
3322       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3323       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3324 
3325       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3326       j = 0;
3327       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3328       for (i=0; i<n; i++) {
3329         if (j >= BsubN) break;
3330         while (subgarray[i] > garray[j]) j++;
3331 
3332         if (subgarray[i] == garray[j]) {
3333           idx_new[i] = idx[j++];
3334         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3335       }
3336       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3337 
3338       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3339       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3340 
3341     } else if (BsubN < n) {
3342       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3343     }
3344 
3345     ierr = PetscFree(garray);CHKERRQ(ierr);
3346     *submat = M;
3347 
3348     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3349     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3350     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3351 
3352     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3353     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3354 
3355     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3356     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3357   }
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3362 {
3363   PetscErrorCode ierr;
3364   IS             iscol_local=NULL,isrow_d;
3365   PetscInt       csize;
3366   PetscInt       n,i,j,start,end;
3367   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3368   MPI_Comm       comm;
3369 
3370   PetscFunctionBegin;
3371   /* If isrow has same processor distribution as mat,
3372      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3373   if (call == MAT_REUSE_MATRIX) {
3374     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3375     if (isrow_d) {
3376       sameRowDist  = PETSC_TRUE;
3377       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3378     } else {
3379       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3380       if (iscol_local) {
3381         sameRowDist  = PETSC_TRUE;
3382         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3383       }
3384     }
3385   } else {
3386     /* Check if isrow has same processor distribution as mat */
3387     sameDist[0] = PETSC_FALSE;
3388     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3389     if (!n) {
3390       sameDist[0] = PETSC_TRUE;
3391     } else {
3392       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3393       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3394       if (i >= start && j < end) {
3395         sameDist[0] = PETSC_TRUE;
3396       }
3397     }
3398 
3399     /* Check if iscol has same processor distribution as mat */
3400     sameDist[1] = PETSC_FALSE;
3401     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3402     if (!n) {
3403       sameDist[1] = PETSC_TRUE;
3404     } else {
3405       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3406       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3407       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3408     }
3409 
3410     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3411     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3412     sameRowDist = tsameDist[0];
3413   }
3414 
3415   if (sameRowDist) {
3416     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3417       /* isrow and iscol have same processor distribution as mat */
3418       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3419       PetscFunctionReturn(0);
3420     } else { /* sameRowDist */
3421       /* isrow has same processor distribution as mat */
3422       if (call == MAT_INITIAL_MATRIX) {
3423         PetscBool sorted;
3424         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3425         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3426         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3427         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3428 
3429         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3430         if (sorted) {
3431           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3432           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3433           PetscFunctionReturn(0);
3434         }
3435       } else { /* call == MAT_REUSE_MATRIX */
3436         IS    iscol_sub;
3437         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3438         if (iscol_sub) {
3439           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3440           PetscFunctionReturn(0);
3441         }
3442       }
3443     }
3444   }
3445 
3446   /* General case: iscol -> iscol_local which has global size of iscol */
3447   if (call == MAT_REUSE_MATRIX) {
3448     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3449     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3450   } else {
3451     if (!iscol_local) {
3452       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3453     }
3454   }
3455 
3456   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3457   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3458 
3459   if (call == MAT_INITIAL_MATRIX) {
3460     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3461     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3462   }
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 /*@C
3467      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3468          and "off-diagonal" part of the matrix in CSR format.
3469 
3470    Collective on MPI_Comm
3471 
3472    Input Parameters:
3473 +  comm - MPI communicator
3474 .  A - "diagonal" portion of matrix
3475 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3476 -  garray - global index of B columns
3477 
3478    Output Parameter:
3479 .   mat - the matrix, with input A as its local diagonal matrix
3480    Level: advanced
3481 
3482    Notes:
3483        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3484        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3485 
3486 .seealso: MatCreateMPIAIJWithSplitArrays()
3487 @*/
3488 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3489 {
3490   PetscErrorCode ierr;
3491   Mat_MPIAIJ     *maij;
3492   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3493   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3494   PetscScalar    *oa=b->a;
3495   Mat            Bnew;
3496   PetscInt       m,n,N;
3497 
3498   PetscFunctionBegin;
3499   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3500   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3501   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3502   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3503   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3504   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3505 
3506   /* Get global columns of mat */
3507   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3508 
3509   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3510   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3511   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3512   maij = (Mat_MPIAIJ*)(*mat)->data;
3513 
3514   (*mat)->preallocated = PETSC_TRUE;
3515 
3516   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3517   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3518 
3519   /* Set A as diagonal portion of *mat */
3520   maij->A = A;
3521 
3522   nz = oi[m];
3523   for (i=0; i<nz; i++) {
3524     col   = oj[i];
3525     oj[i] = garray[col];
3526   }
3527 
3528    /* Set Bnew as off-diagonal portion of *mat */
3529   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3530   bnew        = (Mat_SeqAIJ*)Bnew->data;
3531   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3532   maij->B     = Bnew;
3533 
3534   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3535 
3536   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3537   b->free_a       = PETSC_FALSE;
3538   b->free_ij      = PETSC_FALSE;
3539   ierr = MatDestroy(&B);CHKERRQ(ierr);
3540 
3541   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3542   bnew->free_a       = PETSC_TRUE;
3543   bnew->free_ij      = PETSC_TRUE;
3544 
3545   /* condense columns of maij->B */
3546   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3547   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3549   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3550   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3551   PetscFunctionReturn(0);
3552 }
3553 
3554 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3555 
3556 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3557 {
3558   PetscErrorCode ierr;
3559   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3560   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3561   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3562   Mat            M,Msub,B=a->B;
3563   MatScalar      *aa;
3564   Mat_SeqAIJ     *aij;
3565   PetscInt       *garray = a->garray,*colsub,Ncols;
3566   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3567   IS             iscol_sub,iscmap;
3568   const PetscInt *is_idx,*cmap;
3569   PetscBool      allcolumns=PETSC_FALSE;
3570   MPI_Comm       comm;
3571 
3572   PetscFunctionBegin;
3573   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3574 
3575   if (call == MAT_REUSE_MATRIX) {
3576     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3577     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3578     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3579 
3580     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3581     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3582 
3583     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3584     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3585 
3586     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3587 
3588   } else { /* call == MAT_INITIAL_MATRIX) */
3589     PetscBool flg;
3590 
3591     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3592     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3593 
3594     /* (1) iscol -> nonscalable iscol_local */
3595     /* Check for special case: each processor gets entire matrix columns */
3596     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3597     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3598     if (allcolumns) {
3599       iscol_sub = iscol_local;
3600       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3601       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3602 
3603     } else {
3604       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3605       PetscInt *idx,*cmap1,k;
3606       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3607       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3608       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3609       count = 0;
3610       k     = 0;
3611       for (i=0; i<Ncols; i++) {
3612         j = is_idx[i];
3613         if (j >= cstart && j < cend) {
3614           /* diagonal part of mat */
3615           idx[count]     = j;
3616           cmap1[count++] = i; /* column index in submat */
3617         } else if (Bn) {
3618           /* off-diagonal part of mat */
3619           if (j == garray[k]) {
3620             idx[count]     = j;
3621             cmap1[count++] = i;  /* column index in submat */
3622           } else if (j > garray[k]) {
3623             while (j > garray[k] && k < Bn-1) k++;
3624             if (j == garray[k]) {
3625               idx[count]     = j;
3626               cmap1[count++] = i; /* column index in submat */
3627             }
3628           }
3629         }
3630       }
3631       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3632 
3633       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3634       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3635       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3636 
3637       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3638     }
3639 
3640     /* (3) Create sequential Msub */
3641     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3642   }
3643 
3644   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3645   aij  = (Mat_SeqAIJ*)(Msub)->data;
3646   ii   = aij->i;
3647   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3648 
3649   /*
3650       m - number of local rows
3651       Ncols - number of columns (same on all processors)
3652       rstart - first row in new global matrix generated
3653   */
3654   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3655 
3656   if (call == MAT_INITIAL_MATRIX) {
3657     /* (4) Create parallel newmat */
3658     PetscMPIInt    rank,size;
3659     PetscInt       csize;
3660 
3661     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3662     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3663 
3664     /*
3665         Determine the number of non-zeros in the diagonal and off-diagonal
3666         portions of the matrix in order to do correct preallocation
3667     */
3668 
3669     /* first get start and end of "diagonal" columns */
3670     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3671     if (csize == PETSC_DECIDE) {
3672       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3673       if (mglobal == Ncols) { /* square matrix */
3674         nlocal = m;
3675       } else {
3676         nlocal = Ncols/size + ((Ncols % size) > rank);
3677       }
3678     } else {
3679       nlocal = csize;
3680     }
3681     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3682     rstart = rend - nlocal;
3683     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3684 
3685     /* next, compute all the lengths */
3686     jj    = aij->j;
3687     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3688     olens = dlens + m;
3689     for (i=0; i<m; i++) {
3690       jend = ii[i+1] - ii[i];
3691       olen = 0;
3692       dlen = 0;
3693       for (j=0; j<jend; j++) {
3694         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3695         else dlen++;
3696         jj++;
3697       }
3698       olens[i] = olen;
3699       dlens[i] = dlen;
3700     }
3701 
3702     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3703     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3704 
3705     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3706     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3707     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3708     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3709     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3710     ierr = PetscFree(dlens);CHKERRQ(ierr);
3711 
3712   } else { /* call == MAT_REUSE_MATRIX */
3713     M    = *newmat;
3714     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3715     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3716     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3717     /*
3718          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3719        rather than the slower MatSetValues().
3720     */
3721     M->was_assembled = PETSC_TRUE;
3722     M->assembled     = PETSC_FALSE;
3723   }
3724 
3725   /* (5) Set values of Msub to *newmat */
3726   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3727   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3728 
3729   jj   = aij->j;
3730   aa   = aij->a;
3731   for (i=0; i<m; i++) {
3732     row = rstart + i;
3733     nz  = ii[i+1] - ii[i];
3734     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3735     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3736     jj += nz; aa += nz;
3737   }
3738   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3739 
3740   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742 
3743   ierr = PetscFree(colsub);CHKERRQ(ierr);
3744 
3745   /* save Msub, iscol_sub and iscmap used in processor for next request */
3746   if (call ==  MAT_INITIAL_MATRIX) {
3747     *newmat = M;
3748     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3749     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3750 
3751     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3752     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3753 
3754     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3755     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3756 
3757     if (iscol_local) {
3758       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3759       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3760     }
3761   }
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 /*
3766     Not great since it makes two copies of the submatrix, first an SeqAIJ
3767   in local and then by concatenating the local matrices the end result.
3768   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3769 
3770   Note: This requires a sequential iscol with all indices.
3771 */
3772 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3773 {
3774   PetscErrorCode ierr;
3775   PetscMPIInt    rank,size;
3776   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3777   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3778   Mat            M,Mreuse;
3779   MatScalar      *aa,*vwork;
3780   MPI_Comm       comm;
3781   Mat_SeqAIJ     *aij;
3782   PetscBool      colflag,allcolumns=PETSC_FALSE;
3783 
3784   PetscFunctionBegin;
3785   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3786   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3788 
3789   /* Check for special case: each processor gets entire matrix columns */
3790   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3791   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3792   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3793 
3794   if (call ==  MAT_REUSE_MATRIX) {
3795     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3796     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3797     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3798   } else {
3799     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3800   }
3801 
3802   /*
3803       m - number of local rows
3804       n - number of columns (same on all processors)
3805       rstart - first row in new global matrix generated
3806   */
3807   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3808   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3809   if (call == MAT_INITIAL_MATRIX) {
3810     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3811     ii  = aij->i;
3812     jj  = aij->j;
3813 
3814     /*
3815         Determine the number of non-zeros in the diagonal and off-diagonal
3816         portions of the matrix in order to do correct preallocation
3817     */
3818 
3819     /* first get start and end of "diagonal" columns */
3820     if (csize == PETSC_DECIDE) {
3821       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3822       if (mglobal == n) { /* square matrix */
3823         nlocal = m;
3824       } else {
3825         nlocal = n/size + ((n % size) > rank);
3826       }
3827     } else {
3828       nlocal = csize;
3829     }
3830     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3831     rstart = rend - nlocal;
3832     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3833 
3834     /* next, compute all the lengths */
3835     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3836     olens = dlens + m;
3837     for (i=0; i<m; i++) {
3838       jend = ii[i+1] - ii[i];
3839       olen = 0;
3840       dlen = 0;
3841       for (j=0; j<jend; j++) {
3842         if (*jj < rstart || *jj >= rend) olen++;
3843         else dlen++;
3844         jj++;
3845       }
3846       olens[i] = olen;
3847       dlens[i] = dlen;
3848     }
3849     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3850     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3851     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3852     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3853     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3854     ierr = PetscFree(dlens);CHKERRQ(ierr);
3855   } else {
3856     PetscInt ml,nl;
3857 
3858     M    = *newmat;
3859     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3860     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3861     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3862     /*
3863          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3864        rather than the slower MatSetValues().
3865     */
3866     M->was_assembled = PETSC_TRUE;
3867     M->assembled     = PETSC_FALSE;
3868   }
3869   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3870   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3871   ii   = aij->i;
3872   jj   = aij->j;
3873   aa   = aij->a;
3874   for (i=0; i<m; i++) {
3875     row   = rstart + i;
3876     nz    = ii[i+1] - ii[i];
3877     cwork = jj;     jj += nz;
3878     vwork = aa;     aa += nz;
3879     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3880   }
3881 
3882   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884   *newmat = M;
3885 
3886   /* save submatrix used in processor for next request */
3887   if (call ==  MAT_INITIAL_MATRIX) {
3888     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3889     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3890   }
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3895 {
3896   PetscInt       m,cstart, cend,j,nnz,i,d;
3897   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3898   const PetscInt *JJ;
3899   PetscScalar    *values;
3900   PetscErrorCode ierr;
3901   PetscBool      nooffprocentries;
3902 
3903   PetscFunctionBegin;
3904   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3905 
3906   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3907   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3908   m      = B->rmap->n;
3909   cstart = B->cmap->rstart;
3910   cend   = B->cmap->rend;
3911   rstart = B->rmap->rstart;
3912 
3913   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3914 
3915 #if defined(PETSC_USE_DEBUG)
3916   for (i=0; i<m && Ii; i++) {
3917     nnz = Ii[i+1]- Ii[i];
3918     JJ  = J + Ii[i];
3919     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3920     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3921     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3922   }
3923 #endif
3924 
3925   for (i=0; i<m && Ii; i++) {
3926     nnz     = Ii[i+1]- Ii[i];
3927     JJ      = J + Ii[i];
3928     nnz_max = PetscMax(nnz_max,nnz);
3929     d       = 0;
3930     for (j=0; j<nnz; j++) {
3931       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3932     }
3933     d_nnz[i] = d;
3934     o_nnz[i] = nnz - d;
3935   }
3936   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3937   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3938 
3939   if (v) values = (PetscScalar*)v;
3940   else {
3941     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3942   }
3943 
3944   for (i=0; i<m && Ii; i++) {
3945     ii   = i + rstart;
3946     nnz  = Ii[i+1]- Ii[i];
3947     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3948   }
3949   nooffprocentries    = B->nooffprocentries;
3950   B->nooffprocentries = PETSC_TRUE;
3951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   B->nooffprocentries = nooffprocentries;
3954 
3955   if (!v) {
3956     ierr = PetscFree(values);CHKERRQ(ierr);
3957   }
3958   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3959   PetscFunctionReturn(0);
3960 }
3961 
3962 /*@
3963    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3964    (the default parallel PETSc format).
3965 
3966    Collective on MPI_Comm
3967 
3968    Input Parameters:
3969 +  B - the matrix
3970 .  i - the indices into j for the start of each local row (starts with zero)
3971 .  j - the column indices for each local row (starts with zero)
3972 -  v - optional values in the matrix
3973 
3974    Level: developer
3975 
3976    Notes:
3977        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3978      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3979      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3980 
3981        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3982 
3983        The format which is used for the sparse matrix input, is equivalent to a
3984     row-major ordering.. i.e for the following matrix, the input data expected is
3985     as shown
3986 
3987 $        1 0 0
3988 $        2 0 3     P0
3989 $       -------
3990 $        4 5 6     P1
3991 $
3992 $     Process0 [P0]: rows_owned=[0,1]
3993 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3994 $        j =  {0,0,2}  [size = 3]
3995 $        v =  {1,2,3}  [size = 3]
3996 $
3997 $     Process1 [P1]: rows_owned=[2]
3998 $        i =  {0,3}    [size = nrow+1  = 1+1]
3999 $        j =  {0,1,2}  [size = 3]
4000 $        v =  {4,5,6}  [size = 3]
4001 
4002 .keywords: matrix, aij, compressed row, sparse, parallel
4003 
4004 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4005           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4006 @*/
4007 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4008 {
4009   PetscErrorCode ierr;
4010 
4011   PetscFunctionBegin;
4012   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 /*@C
4017    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4018    (the default parallel PETSc format).  For good matrix assembly performance
4019    the user should preallocate the matrix storage by setting the parameters
4020    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4021    performance can be increased by more than a factor of 50.
4022 
4023    Collective on MPI_Comm
4024 
4025    Input Parameters:
4026 +  B - the matrix
4027 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4028            (same value is used for all local rows)
4029 .  d_nnz - array containing the number of nonzeros in the various rows of the
4030            DIAGONAL portion of the local submatrix (possibly different for each row)
4031            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4032            The size of this array is equal to the number of local rows, i.e 'm'.
4033            For matrices that will be factored, you must leave room for (and set)
4034            the diagonal entry even if it is zero.
4035 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4036            submatrix (same value is used for all local rows).
4037 -  o_nnz - array containing the number of nonzeros in the various rows of the
4038            OFF-DIAGONAL portion of the local submatrix (possibly different for
4039            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4040            structure. The size of this array is equal to the number
4041            of local rows, i.e 'm'.
4042 
4043    If the *_nnz parameter is given then the *_nz parameter is ignored
4044 
4045    The AIJ format (also called the Yale sparse matrix format or
4046    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4047    storage.  The stored row and column indices begin with zero.
4048    See Users-Manual: ch_mat for details.
4049 
4050    The parallel matrix is partitioned such that the first m0 rows belong to
4051    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4052    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4053 
4054    The DIAGONAL portion of the local submatrix of a processor can be defined
4055    as the submatrix which is obtained by extraction the part corresponding to
4056    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4057    first row that belongs to the processor, r2 is the last row belonging to
4058    the this processor, and c1-c2 is range of indices of the local part of a
4059    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4060    common case of a square matrix, the row and column ranges are the same and
4061    the DIAGONAL part is also square. The remaining portion of the local
4062    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4063 
4064    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4065 
4066    You can call MatGetInfo() to get information on how effective the preallocation was;
4067    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4068    You can also run with the option -info and look for messages with the string
4069    malloc in them to see if additional memory allocation was needed.
4070 
4071    Example usage:
4072 
4073    Consider the following 8x8 matrix with 34 non-zero values, that is
4074    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4075    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4076    as follows:
4077 
4078 .vb
4079             1  2  0  |  0  3  0  |  0  4
4080     Proc0   0  5  6  |  7  0  0  |  8  0
4081             9  0 10  | 11  0  0  | 12  0
4082     -------------------------------------
4083            13  0 14  | 15 16 17  |  0  0
4084     Proc1   0 18  0  | 19 20 21  |  0  0
4085             0  0  0  | 22 23  0  | 24  0
4086     -------------------------------------
4087     Proc2  25 26 27  |  0  0 28  | 29  0
4088            30  0  0  | 31 32 33  |  0 34
4089 .ve
4090 
4091    This can be represented as a collection of submatrices as:
4092 
4093 .vb
4094       A B C
4095       D E F
4096       G H I
4097 .ve
4098 
4099    Where the submatrices A,B,C are owned by proc0, D,E,F are
4100    owned by proc1, G,H,I are owned by proc2.
4101 
4102    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104    The 'M','N' parameters are 8,8, and have the same values on all procs.
4105 
4106    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4107    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4108    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4109    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4110    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4111    matrix, ans [DF] as another SeqAIJ matrix.
4112 
4113    When d_nz, o_nz parameters are specified, d_nz storage elements are
4114    allocated for every row of the local diagonal submatrix, and o_nz
4115    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4116    One way to choose d_nz and o_nz is to use the max nonzerors per local
4117    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4118    In this case, the values of d_nz,o_nz are:
4119 .vb
4120      proc0 : dnz = 2, o_nz = 2
4121      proc1 : dnz = 3, o_nz = 2
4122      proc2 : dnz = 1, o_nz = 4
4123 .ve
4124    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4125    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4126    for proc3. i.e we are using 12+15+10=37 storage locations to store
4127    34 values.
4128 
4129    When d_nnz, o_nnz parameters are specified, the storage is specified
4130    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4131    In the above case the values for d_nnz,o_nnz are:
4132 .vb
4133      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4134      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4135      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4136 .ve
4137    Here the space allocated is sum of all the above values i.e 34, and
4138    hence pre-allocation is perfect.
4139 
4140    Level: intermediate
4141 
4142 .keywords: matrix, aij, compressed row, sparse, parallel
4143 
4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4145           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4146 @*/
4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4148 {
4149   PetscErrorCode ierr;
4150 
4151   PetscFunctionBegin;
4152   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4153   PetscValidType(B,1);
4154   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 /*@
4159      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4160          CSR format the local rows.
4161 
4162    Collective on MPI_Comm
4163 
4164    Input Parameters:
4165 +  comm - MPI communicator
4166 .  m - number of local rows (Cannot be PETSC_DECIDE)
4167 .  n - This value should be the same as the local size used in creating the
4168        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4169        calculated if N is given) For square matrices n is almost always m.
4170 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4171 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4172 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4173 .   j - column indices
4174 -   a - matrix values
4175 
4176    Output Parameter:
4177 .   mat - the matrix
4178 
4179    Level: intermediate
4180 
4181    Notes:
4182        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4183      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4184      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4185 
4186        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4187 
4188        The format which is used for the sparse matrix input, is equivalent to a
4189     row-major ordering.. i.e for the following matrix, the input data expected is
4190     as shown
4191 
4192 $        1 0 0
4193 $        2 0 3     P0
4194 $       -------
4195 $        4 5 6     P1
4196 $
4197 $     Process0 [P0]: rows_owned=[0,1]
4198 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4199 $        j =  {0,0,2}  [size = 3]
4200 $        v =  {1,2,3}  [size = 3]
4201 $
4202 $     Process1 [P1]: rows_owned=[2]
4203 $        i =  {0,3}    [size = nrow+1  = 1+1]
4204 $        j =  {0,1,2}  [size = 3]
4205 $        v =  {4,5,6}  [size = 3]
4206 
4207 .keywords: matrix, aij, compressed row, sparse, parallel
4208 
4209 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4210           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4211 @*/
4212 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4213 {
4214   PetscErrorCode ierr;
4215 
4216   PetscFunctionBegin;
4217   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4218   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4219   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4220   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4221   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4222   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4223   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 /*@C
4228    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4229    (the default parallel PETSc format).  For good matrix assembly performance
4230    the user should preallocate the matrix storage by setting the parameters
4231    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4232    performance can be increased by more than a factor of 50.
4233 
4234    Collective on MPI_Comm
4235 
4236    Input Parameters:
4237 +  comm - MPI communicator
4238 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4239            This value should be the same as the local size used in creating the
4240            y vector for the matrix-vector product y = Ax.
4241 .  n - This value should be the same as the local size used in creating the
4242        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4243        calculated if N is given) For square matrices n is almost always m.
4244 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4245 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4246 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4247            (same value is used for all local rows)
4248 .  d_nnz - array containing the number of nonzeros in the various rows of the
4249            DIAGONAL portion of the local submatrix (possibly different for each row)
4250            or NULL, if d_nz is used to specify the nonzero structure.
4251            The size of this array is equal to the number of local rows, i.e 'm'.
4252 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4253            submatrix (same value is used for all local rows).
4254 -  o_nnz - array containing the number of nonzeros in the various rows of the
4255            OFF-DIAGONAL portion of the local submatrix (possibly different for
4256            each row) or NULL, if o_nz is used to specify the nonzero
4257            structure. The size of this array is equal to the number
4258            of local rows, i.e 'm'.
4259 
4260    Output Parameter:
4261 .  A - the matrix
4262 
4263    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4264    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4265    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4266 
4267    Notes:
4268    If the *_nnz parameter is given then the *_nz parameter is ignored
4269 
4270    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4271    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4272    storage requirements for this matrix.
4273 
4274    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4275    processor than it must be used on all processors that share the object for
4276    that argument.
4277 
4278    The user MUST specify either the local or global matrix dimensions
4279    (possibly both).
4280 
4281    The parallel matrix is partitioned across processors such that the
4282    first m0 rows belong to process 0, the next m1 rows belong to
4283    process 1, the next m2 rows belong to process 2 etc.. where
4284    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4285    values corresponding to [m x N] submatrix.
4286 
4287    The columns are logically partitioned with the n0 columns belonging
4288    to 0th partition, the next n1 columns belonging to the next
4289    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4290 
4291    The DIAGONAL portion of the local submatrix on any given processor
4292    is the submatrix corresponding to the rows and columns m,n
4293    corresponding to the given processor. i.e diagonal matrix on
4294    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4295    etc. The remaining portion of the local submatrix [m x (N-n)]
4296    constitute the OFF-DIAGONAL portion. The example below better
4297    illustrates this concept.
4298 
4299    For a square global matrix we define each processor's diagonal portion
4300    to be its local rows and the corresponding columns (a square submatrix);
4301    each processor's off-diagonal portion encompasses the remainder of the
4302    local matrix (a rectangular submatrix).
4303 
4304    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4305 
4306    When calling this routine with a single process communicator, a matrix of
4307    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4308    type of communicator, use the construction mechanism
4309 .vb
4310      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4311 .ve
4312 
4313 $     MatCreate(...,&A);
4314 $     MatSetType(A,MATMPIAIJ);
4315 $     MatSetSizes(A, m,n,M,N);
4316 $     MatMPIAIJSetPreallocation(A,...);
4317 
4318    By default, this format uses inodes (identical nodes) when possible.
4319    We search for consecutive rows with the same nonzero structure, thereby
4320    reusing matrix information to achieve increased efficiency.
4321 
4322    Options Database Keys:
4323 +  -mat_no_inode  - Do not use inodes
4324 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4325 
4326 
4327 
4328    Example usage:
4329 
4330    Consider the following 8x8 matrix with 34 non-zero values, that is
4331    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4332    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4333    as follows
4334 
4335 .vb
4336             1  2  0  |  0  3  0  |  0  4
4337     Proc0   0  5  6  |  7  0  0  |  8  0
4338             9  0 10  | 11  0  0  | 12  0
4339     -------------------------------------
4340            13  0 14  | 15 16 17  |  0  0
4341     Proc1   0 18  0  | 19 20 21  |  0  0
4342             0  0  0  | 22 23  0  | 24  0
4343     -------------------------------------
4344     Proc2  25 26 27  |  0  0 28  | 29  0
4345            30  0  0  | 31 32 33  |  0 34
4346 .ve
4347 
4348    This can be represented as a collection of submatrices as
4349 
4350 .vb
4351       A B C
4352       D E F
4353       G H I
4354 .ve
4355 
4356    Where the submatrices A,B,C are owned by proc0, D,E,F are
4357    owned by proc1, G,H,I are owned by proc2.
4358 
4359    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4360    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4361    The 'M','N' parameters are 8,8, and have the same values on all procs.
4362 
4363    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4364    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4365    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4366    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4367    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4368    matrix, ans [DF] as another SeqAIJ matrix.
4369 
4370    When d_nz, o_nz parameters are specified, d_nz storage elements are
4371    allocated for every row of the local diagonal submatrix, and o_nz
4372    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4373    One way to choose d_nz and o_nz is to use the max nonzerors per local
4374    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4375    In this case, the values of d_nz,o_nz are
4376 .vb
4377      proc0 : dnz = 2, o_nz = 2
4378      proc1 : dnz = 3, o_nz = 2
4379      proc2 : dnz = 1, o_nz = 4
4380 .ve
4381    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4382    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4383    for proc3. i.e we are using 12+15+10=37 storage locations to store
4384    34 values.
4385 
4386    When d_nnz, o_nnz parameters are specified, the storage is specified
4387    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4388    In the above case the values for d_nnz,o_nnz are
4389 .vb
4390      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4391      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4392      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4393 .ve
4394    Here the space allocated is sum of all the above values i.e 34, and
4395    hence pre-allocation is perfect.
4396 
4397    Level: intermediate
4398 
4399 .keywords: matrix, aij, compressed row, sparse, parallel
4400 
4401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4402           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4403 @*/
4404 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4405 {
4406   PetscErrorCode ierr;
4407   PetscMPIInt    size;
4408 
4409   PetscFunctionBegin;
4410   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4411   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4412   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4413   if (size > 1) {
4414     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4415     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4416   } else {
4417     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4418     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4419   }
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4424 {
4425   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4426   PetscBool      flg;
4427   PetscErrorCode ierr;
4428 
4429   PetscFunctionBegin;
4430   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4431   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4432   if (Ad)     *Ad     = a->A;
4433   if (Ao)     *Ao     = a->B;
4434   if (colmap) *colmap = a->garray;
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4439 {
4440   PetscErrorCode ierr;
4441   PetscInt       m,N,i,rstart,nnz,Ii;
4442   PetscInt       *indx;
4443   PetscScalar    *values;
4444 
4445   PetscFunctionBegin;
4446   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4447   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4448     PetscInt       *dnz,*onz,sum,bs,cbs;
4449 
4450     if (n == PETSC_DECIDE) {
4451       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4452     }
4453     /* Check sum(n) = N */
4454     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4455     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4456 
4457     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4458     rstart -= m;
4459 
4460     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4461     for (i=0; i<m; i++) {
4462       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4463       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4464       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4465     }
4466 
4467     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4468     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4469     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4470     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4471     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4472     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4473     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4474     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4475   }
4476 
4477   /* numeric phase */
4478   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4479   for (i=0; i<m; i++) {
4480     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4481     Ii   = i + rstart;
4482     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4483     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4484   }
4485   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4486   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4491 {
4492   PetscErrorCode    ierr;
4493   PetscMPIInt       rank;
4494   PetscInt          m,N,i,rstart,nnz;
4495   size_t            len;
4496   const PetscInt    *indx;
4497   PetscViewer       out;
4498   char              *name;
4499   Mat               B;
4500   const PetscScalar *values;
4501 
4502   PetscFunctionBegin;
4503   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4504   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4505   /* Should this be the type of the diagonal block of A? */
4506   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4507   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4508   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4509   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4510   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4511   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4512   for (i=0; i<m; i++) {
4513     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4514     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4515     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516   }
4517   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4519 
4520   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4521   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4522   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4523   sprintf(name,"%s.%d",outfile,rank);
4524   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4525   ierr = PetscFree(name);CHKERRQ(ierr);
4526   ierr = MatView(B,out);CHKERRQ(ierr);
4527   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4528   ierr = MatDestroy(&B);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4533 {
4534   PetscErrorCode      ierr;
4535   Mat_Merge_SeqsToMPI *merge;
4536   PetscContainer      container;
4537 
4538   PetscFunctionBegin;
4539   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4540   if (container) {
4541     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4542     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4554     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4555     ierr = PetscFree(merge);CHKERRQ(ierr);
4556     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4557   }
4558   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4559   PetscFunctionReturn(0);
4560 }
4561 
4562 #include <../src/mat/utils/freespace.h>
4563 #include <petscbt.h>
4564 
4565 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4566 {
4567   PetscErrorCode      ierr;
4568   MPI_Comm            comm;
4569   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4570   PetscMPIInt         size,rank,taga,*len_s;
4571   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4572   PetscInt            proc,m;
4573   PetscInt            **buf_ri,**buf_rj;
4574   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4575   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4576   MPI_Request         *s_waits,*r_waits;
4577   MPI_Status          *status;
4578   MatScalar           *aa=a->a;
4579   MatScalar           **abuf_r,*ba_i;
4580   Mat_Merge_SeqsToMPI *merge;
4581   PetscContainer      container;
4582 
4583   PetscFunctionBegin;
4584   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4585   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4586 
4587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4589 
4590   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4591   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4592 
4593   bi     = merge->bi;
4594   bj     = merge->bj;
4595   buf_ri = merge->buf_ri;
4596   buf_rj = merge->buf_rj;
4597 
4598   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4599   owners = merge->rowmap->range;
4600   len_s  = merge->len_s;
4601 
4602   /* send and recv matrix values */
4603   /*-----------------------------*/
4604   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4605   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4606 
4607   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4608   for (proc=0,k=0; proc<size; proc++) {
4609     if (!len_s[proc]) continue;
4610     i    = owners[proc];
4611     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4612     k++;
4613   }
4614 
4615   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4616   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4617   ierr = PetscFree(status);CHKERRQ(ierr);
4618 
4619   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4620   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4621 
4622   /* insert mat values of mpimat */
4623   /*----------------------------*/
4624   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4625   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4626 
4627   for (k=0; k<merge->nrecv; k++) {
4628     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4629     nrows       = *(buf_ri_k[k]);
4630     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4631     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4632   }
4633 
4634   /* set values of ba */
4635   m = merge->rowmap->n;
4636   for (i=0; i<m; i++) {
4637     arow = owners[rank] + i;
4638     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4639     bnzi = bi[i+1] - bi[i];
4640     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4641 
4642     /* add local non-zero vals of this proc's seqmat into ba */
4643     anzi   = ai[arow+1] - ai[arow];
4644     aj     = a->j + ai[arow];
4645     aa     = a->a + ai[arow];
4646     nextaj = 0;
4647     for (j=0; nextaj<anzi; j++) {
4648       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4649         ba_i[j] += aa[nextaj++];
4650       }
4651     }
4652 
4653     /* add received vals into ba */
4654     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4655       /* i-th row */
4656       if (i == *nextrow[k]) {
4657         anzi   = *(nextai[k]+1) - *nextai[k];
4658         aj     = buf_rj[k] + *(nextai[k]);
4659         aa     = abuf_r[k] + *(nextai[k]);
4660         nextaj = 0;
4661         for (j=0; nextaj<anzi; j++) {
4662           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4663             ba_i[j] += aa[nextaj++];
4664           }
4665         }
4666         nextrow[k]++; nextai[k]++;
4667       }
4668     }
4669     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4670   }
4671   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4672   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4673 
4674   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4675   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4676   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4677   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4678   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4679   PetscFunctionReturn(0);
4680 }
4681 
4682 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4683 {
4684   PetscErrorCode      ierr;
4685   Mat                 B_mpi;
4686   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4687   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4688   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4689   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4690   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4691   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4692   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4693   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4694   MPI_Status          *status;
4695   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4696   PetscBT             lnkbt;
4697   Mat_Merge_SeqsToMPI *merge;
4698   PetscContainer      container;
4699 
4700   PetscFunctionBegin;
4701   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4702 
4703   /* make sure it is a PETSc comm */
4704   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4705   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4706   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4707 
4708   ierr = PetscNew(&merge);CHKERRQ(ierr);
4709   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4710 
4711   /* determine row ownership */
4712   /*---------------------------------------------------------*/
4713   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4714   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4715   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4716   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4717   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4718   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4719   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4720 
4721   m      = merge->rowmap->n;
4722   owners = merge->rowmap->range;
4723 
4724   /* determine the number of messages to send, their lengths */
4725   /*---------------------------------------------------------*/
4726   len_s = merge->len_s;
4727 
4728   len          = 0; /* length of buf_si[] */
4729   merge->nsend = 0;
4730   for (proc=0; proc<size; proc++) {
4731     len_si[proc] = 0;
4732     if (proc == rank) {
4733       len_s[proc] = 0;
4734     } else {
4735       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4736       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4737     }
4738     if (len_s[proc]) {
4739       merge->nsend++;
4740       nrows = 0;
4741       for (i=owners[proc]; i<owners[proc+1]; i++) {
4742         if (ai[i+1] > ai[i]) nrows++;
4743       }
4744       len_si[proc] = 2*(nrows+1);
4745       len         += len_si[proc];
4746     }
4747   }
4748 
4749   /* determine the number and length of messages to receive for ij-structure */
4750   /*-------------------------------------------------------------------------*/
4751   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4752   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4753 
4754   /* post the Irecv of j-structure */
4755   /*-------------------------------*/
4756   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4757   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4758 
4759   /* post the Isend of j-structure */
4760   /*--------------------------------*/
4761   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4762 
4763   for (proc=0, k=0; proc<size; proc++) {
4764     if (!len_s[proc]) continue;
4765     i    = owners[proc];
4766     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4767     k++;
4768   }
4769 
4770   /* receives and sends of j-structure are complete */
4771   /*------------------------------------------------*/
4772   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4773   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4774 
4775   /* send and recv i-structure */
4776   /*---------------------------*/
4777   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4778   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4779 
4780   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4781   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4782   for (proc=0,k=0; proc<size; proc++) {
4783     if (!len_s[proc]) continue;
4784     /* form outgoing message for i-structure:
4785          buf_si[0]:                 nrows to be sent
4786                [1:nrows]:           row index (global)
4787                [nrows+1:2*nrows+1]: i-structure index
4788     */
4789     /*-------------------------------------------*/
4790     nrows       = len_si[proc]/2 - 1;
4791     buf_si_i    = buf_si + nrows+1;
4792     buf_si[0]   = nrows;
4793     buf_si_i[0] = 0;
4794     nrows       = 0;
4795     for (i=owners[proc]; i<owners[proc+1]; i++) {
4796       anzi = ai[i+1] - ai[i];
4797       if (anzi) {
4798         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4799         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4800         nrows++;
4801       }
4802     }
4803     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4804     k++;
4805     buf_si += len_si[proc];
4806   }
4807 
4808   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4809   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4810 
4811   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4812   for (i=0; i<merge->nrecv; i++) {
4813     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4814   }
4815 
4816   ierr = PetscFree(len_si);CHKERRQ(ierr);
4817   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4818   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4819   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4820   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4821   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4822   ierr = PetscFree(status);CHKERRQ(ierr);
4823 
4824   /* compute a local seq matrix in each processor */
4825   /*----------------------------------------------*/
4826   /* allocate bi array and free space for accumulating nonzero column info */
4827   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4828   bi[0] = 0;
4829 
4830   /* create and initialize a linked list */
4831   nlnk = N+1;
4832   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4833 
4834   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4835   len  = ai[owners[rank+1]] - ai[owners[rank]];
4836   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4837 
4838   current_space = free_space;
4839 
4840   /* determine symbolic info for each local row */
4841   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4842 
4843   for (k=0; k<merge->nrecv; k++) {
4844     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4845     nrows       = *buf_ri_k[k];
4846     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4847     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4848   }
4849 
4850   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4851   len  = 0;
4852   for (i=0; i<m; i++) {
4853     bnzi = 0;
4854     /* add local non-zero cols of this proc's seqmat into lnk */
4855     arow  = owners[rank] + i;
4856     anzi  = ai[arow+1] - ai[arow];
4857     aj    = a->j + ai[arow];
4858     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4859     bnzi += nlnk;
4860     /* add received col data into lnk */
4861     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4862       if (i == *nextrow[k]) { /* i-th row */
4863         anzi  = *(nextai[k]+1) - *nextai[k];
4864         aj    = buf_rj[k] + *nextai[k];
4865         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4866         bnzi += nlnk;
4867         nextrow[k]++; nextai[k]++;
4868       }
4869     }
4870     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4871 
4872     /* if free space is not available, make more free space */
4873     if (current_space->local_remaining<bnzi) {
4874       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4875       nspacedouble++;
4876     }
4877     /* copy data into free space, then initialize lnk */
4878     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4879     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4880 
4881     current_space->array           += bnzi;
4882     current_space->local_used      += bnzi;
4883     current_space->local_remaining -= bnzi;
4884 
4885     bi[i+1] = bi[i] + bnzi;
4886   }
4887 
4888   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4889 
4890   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4891   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4892   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4893 
4894   /* create symbolic parallel matrix B_mpi */
4895   /*---------------------------------------*/
4896   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4897   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4898   if (n==PETSC_DECIDE) {
4899     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4900   } else {
4901     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4902   }
4903   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4904   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4905   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4906   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4907   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4908 
4909   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4910   B_mpi->assembled    = PETSC_FALSE;
4911   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4912   merge->bi           = bi;
4913   merge->bj           = bj;
4914   merge->buf_ri       = buf_ri;
4915   merge->buf_rj       = buf_rj;
4916   merge->coi          = NULL;
4917   merge->coj          = NULL;
4918   merge->owners_co    = NULL;
4919 
4920   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4921 
4922   /* attach the supporting struct to B_mpi for reuse */
4923   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4924   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4925   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4926   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4927   *mpimat = B_mpi;
4928 
4929   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 /*@C
4934       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4935                  matrices from each processor
4936 
4937     Collective on MPI_Comm
4938 
4939    Input Parameters:
4940 +    comm - the communicators the parallel matrix will live on
4941 .    seqmat - the input sequential matrices
4942 .    m - number of local rows (or PETSC_DECIDE)
4943 .    n - number of local columns (or PETSC_DECIDE)
4944 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4945 
4946    Output Parameter:
4947 .    mpimat - the parallel matrix generated
4948 
4949     Level: advanced
4950 
4951    Notes:
4952      The dimensions of the sequential matrix in each processor MUST be the same.
4953      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4954      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4955 @*/
4956 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4957 {
4958   PetscErrorCode ierr;
4959   PetscMPIInt    size;
4960 
4961   PetscFunctionBegin;
4962   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4963   if (size == 1) {
4964     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4965     if (scall == MAT_INITIAL_MATRIX) {
4966       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4967     } else {
4968       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4969     }
4970     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4971     PetscFunctionReturn(0);
4972   }
4973   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4974   if (scall == MAT_INITIAL_MATRIX) {
4975     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4976   }
4977   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4978   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /*@
4983      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4984           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4985           with MatGetSize()
4986 
4987     Not Collective
4988 
4989    Input Parameters:
4990 +    A - the matrix
4991 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4992 
4993    Output Parameter:
4994 .    A_loc - the local sequential matrix generated
4995 
4996     Level: developer
4997 
4998 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4999 
5000 @*/
5001 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5002 {
5003   PetscErrorCode ierr;
5004   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5005   Mat_SeqAIJ     *mat,*a,*b;
5006   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5007   MatScalar      *aa,*ba,*cam;
5008   PetscScalar    *ca;
5009   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5010   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5011   PetscBool      match;
5012   MPI_Comm       comm;
5013   PetscMPIInt    size;
5014 
5015   PetscFunctionBegin;
5016   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5017   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5018   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5020   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5021 
5022   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5023   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5024   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5025   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5026   aa = a->a; ba = b->a;
5027   if (scall == MAT_INITIAL_MATRIX) {
5028     if (size == 1) {
5029       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5030       PetscFunctionReturn(0);
5031     }
5032 
5033     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5034     ci[0] = 0;
5035     for (i=0; i<am; i++) {
5036       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5037     }
5038     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5039     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5040     k    = 0;
5041     for (i=0; i<am; i++) {
5042       ncols_o = bi[i+1] - bi[i];
5043       ncols_d = ai[i+1] - ai[i];
5044       /* off-diagonal portion of A */
5045       for (jo=0; jo<ncols_o; jo++) {
5046         col = cmap[*bj];
5047         if (col >= cstart) break;
5048         cj[k]   = col; bj++;
5049         ca[k++] = *ba++;
5050       }
5051       /* diagonal portion of A */
5052       for (j=0; j<ncols_d; j++) {
5053         cj[k]   = cstart + *aj++;
5054         ca[k++] = *aa++;
5055       }
5056       /* off-diagonal portion of A */
5057       for (j=jo; j<ncols_o; j++) {
5058         cj[k]   = cmap[*bj++];
5059         ca[k++] = *ba++;
5060       }
5061     }
5062     /* put together the new matrix */
5063     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5064     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5065     /* Since these are PETSc arrays, change flags to free them as necessary. */
5066     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5067     mat->free_a  = PETSC_TRUE;
5068     mat->free_ij = PETSC_TRUE;
5069     mat->nonew   = 0;
5070   } else if (scall == MAT_REUSE_MATRIX) {
5071     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5072     ci = mat->i; cj = mat->j; cam = mat->a;
5073     for (i=0; i<am; i++) {
5074       /* off-diagonal portion of A */
5075       ncols_o = bi[i+1] - bi[i];
5076       for (jo=0; jo<ncols_o; jo++) {
5077         col = cmap[*bj];
5078         if (col >= cstart) break;
5079         *cam++ = *ba++; bj++;
5080       }
5081       /* diagonal portion of A */
5082       ncols_d = ai[i+1] - ai[i];
5083       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5084       /* off-diagonal portion of A */
5085       for (j=jo; j<ncols_o; j++) {
5086         *cam++ = *ba++; bj++;
5087       }
5088     }
5089   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5090   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 /*@C
5095      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5096 
5097     Not Collective
5098 
5099    Input Parameters:
5100 +    A - the matrix
5101 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5102 -    row, col - index sets of rows and columns to extract (or NULL)
5103 
5104    Output Parameter:
5105 .    A_loc - the local sequential matrix generated
5106 
5107     Level: developer
5108 
5109 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5110 
5111 @*/
5112 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5113 {
5114   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5115   PetscErrorCode ierr;
5116   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5117   IS             isrowa,iscola;
5118   Mat            *aloc;
5119   PetscBool      match;
5120 
5121   PetscFunctionBegin;
5122   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5123   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5124   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5125   if (!row) {
5126     start = A->rmap->rstart; end = A->rmap->rend;
5127     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5128   } else {
5129     isrowa = *row;
5130   }
5131   if (!col) {
5132     start = A->cmap->rstart;
5133     cmap  = a->garray;
5134     nzA   = a->A->cmap->n;
5135     nzB   = a->B->cmap->n;
5136     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5137     ncols = 0;
5138     for (i=0; i<nzB; i++) {
5139       if (cmap[i] < start) idx[ncols++] = cmap[i];
5140       else break;
5141     }
5142     imark = i;
5143     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5144     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5145     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5146   } else {
5147     iscola = *col;
5148   }
5149   if (scall != MAT_INITIAL_MATRIX) {
5150     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5151     aloc[0] = *A_loc;
5152   }
5153   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5154   if (!col) { /* attach global id of condensed columns */
5155     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5156   }
5157   *A_loc = aloc[0];
5158   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5159   if (!row) {
5160     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5161   }
5162   if (!col) {
5163     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5164   }
5165   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5166   PetscFunctionReturn(0);
5167 }
5168 
5169 /*@C
5170     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5171 
5172     Collective on Mat
5173 
5174    Input Parameters:
5175 +    A,B - the matrices in mpiaij format
5176 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5177 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5178 
5179    Output Parameter:
5180 +    rowb, colb - index sets of rows and columns of B to extract
5181 -    B_seq - the sequential matrix generated
5182 
5183     Level: developer
5184 
5185 @*/
5186 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5187 {
5188   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5189   PetscErrorCode ierr;
5190   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5191   IS             isrowb,iscolb;
5192   Mat            *bseq=NULL;
5193 
5194   PetscFunctionBegin;
5195   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5196     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5197   }
5198   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5199 
5200   if (scall == MAT_INITIAL_MATRIX) {
5201     start = A->cmap->rstart;
5202     cmap  = a->garray;
5203     nzA   = a->A->cmap->n;
5204     nzB   = a->B->cmap->n;
5205     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5206     ncols = 0;
5207     for (i=0; i<nzB; i++) {  /* row < local row index */
5208       if (cmap[i] < start) idx[ncols++] = cmap[i];
5209       else break;
5210     }
5211     imark = i;
5212     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5213     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5214     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5215     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5216   } else {
5217     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5218     isrowb  = *rowb; iscolb = *colb;
5219     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5220     bseq[0] = *B_seq;
5221   }
5222   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5223   *B_seq = bseq[0];
5224   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5225   if (!rowb) {
5226     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5227   } else {
5228     *rowb = isrowb;
5229   }
5230   if (!colb) {
5231     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5232   } else {
5233     *colb = iscolb;
5234   }
5235   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5236   PetscFunctionReturn(0);
5237 }
5238 
5239 #include <petsc/private/vecscatterimpl.h>
5240 /*
5241     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5242     of the OFF-DIAGONAL portion of local A
5243 
5244     Collective on Mat
5245 
5246    Input Parameters:
5247 +    A,B - the matrices in mpiaij format
5248 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5249 
5250    Output Parameter:
5251 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5252 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5253 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5254 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5255 
5256     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5257      for this matrix. This is not desirable..
5258 
5259     Level: developer
5260 
5261 */
5262 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5263 {
5264   VecScatter_MPI_General *gen_to,*gen_from;
5265   PetscErrorCode         ierr;
5266   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5267   Mat_SeqAIJ             *b_oth;
5268   VecScatter             ctx;
5269   MPI_Comm               comm;
5270   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5271   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5272   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5273   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5274   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5275   MPI_Request            *rwaits = NULL,*swaits = NULL;
5276   MPI_Status             *sstatus,rstatus;
5277   PetscMPIInt            jj,size;
5278   VecScatterType         type;
5279   PetscBool              mpi1;
5280 
5281   PetscFunctionBegin;
5282   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5283   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5284 
5285   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5286     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5287   }
5288   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5289   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5290 
5291   if (size == 1) {
5292     startsj_s = NULL;
5293     bufa_ptr  = NULL;
5294     *B_oth    = NULL;
5295     PetscFunctionReturn(0);
5296   }
5297 
5298   ctx = a->Mvctx;
5299   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5300   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5301   if (!mpi1) {
5302     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5303      thus create a->Mvctx_mpi1 */
5304     if (!a->Mvctx_mpi1) {
5305       a->Mvctx_mpi1_flg = PETSC_TRUE;
5306       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5307     }
5308     ctx = a->Mvctx_mpi1;
5309   }
5310   tag = ((PetscObject)ctx)->tag;
5311 
5312   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5313   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5314   nrecvs   = gen_from->n;
5315   nsends   = gen_to->n;
5316 
5317   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5318   srow    = gen_to->indices;    /* local row index to be sent */
5319   sstarts = gen_to->starts;
5320   sprocs  = gen_to->procs;
5321   sstatus = gen_to->sstatus;
5322   sbs     = gen_to->bs;
5323   rstarts = gen_from->starts;
5324   rprocs  = gen_from->procs;
5325   rbs     = gen_from->bs;
5326 
5327   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5328   if (scall == MAT_INITIAL_MATRIX) {
5329     /* i-array */
5330     /*---------*/
5331     /*  post receives */
5332     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5333     for (i=0; i<nrecvs; i++) {
5334       rowlen = rvalues + rstarts[i]*rbs;
5335       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5336       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5337     }
5338 
5339     /* pack the outgoing message */
5340     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5341 
5342     sstartsj[0] = 0;
5343     rstartsj[0] = 0;
5344     len         = 0; /* total length of j or a array to be sent */
5345     k           = 0;
5346     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5347     for (i=0; i<nsends; i++) {
5348       rowlen = svalues + sstarts[i]*sbs;
5349       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5350       for (j=0; j<nrows; j++) {
5351         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5352         for (l=0; l<sbs; l++) {
5353           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5354 
5355           rowlen[j*sbs+l] = ncols;
5356 
5357           len += ncols;
5358           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5359         }
5360         k++;
5361       }
5362       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5363 
5364       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5365     }
5366     /* recvs and sends of i-array are completed */
5367     i = nrecvs;
5368     while (i--) {
5369       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5370     }
5371     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5372     ierr = PetscFree(svalues);CHKERRQ(ierr);
5373 
5374     /* allocate buffers for sending j and a arrays */
5375     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5376     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5377 
5378     /* create i-array of B_oth */
5379     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5380 
5381     b_othi[0] = 0;
5382     len       = 0; /* total length of j or a array to be received */
5383     k         = 0;
5384     for (i=0; i<nrecvs; i++) {
5385       rowlen = rvalues + rstarts[i]*rbs;
5386       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5387       for (j=0; j<nrows; j++) {
5388         b_othi[k+1] = b_othi[k] + rowlen[j];
5389         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5390         k++;
5391       }
5392       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5393     }
5394     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5395 
5396     /* allocate space for j and a arrrays of B_oth */
5397     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5398     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5399 
5400     /* j-array */
5401     /*---------*/
5402     /*  post receives of j-array */
5403     for (i=0; i<nrecvs; i++) {
5404       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5405       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5406     }
5407 
5408     /* pack the outgoing message j-array */
5409     k = 0;
5410     for (i=0; i<nsends; i++) {
5411       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5412       bufJ  = bufj+sstartsj[i];
5413       for (j=0; j<nrows; j++) {
5414         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5415         for (ll=0; ll<sbs; ll++) {
5416           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5417           for (l=0; l<ncols; l++) {
5418             *bufJ++ = cols[l];
5419           }
5420           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5421         }
5422       }
5423       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5424     }
5425 
5426     /* recvs and sends of j-array are completed */
5427     i = nrecvs;
5428     while (i--) {
5429       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5430     }
5431     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5432   } else if (scall == MAT_REUSE_MATRIX) {
5433     sstartsj = *startsj_s;
5434     rstartsj = *startsj_r;
5435     bufa     = *bufa_ptr;
5436     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5437     b_otha   = b_oth->a;
5438   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5439 
5440   /* a-array */
5441   /*---------*/
5442   /*  post receives of a-array */
5443   for (i=0; i<nrecvs; i++) {
5444     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5445     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5446   }
5447 
5448   /* pack the outgoing message a-array */
5449   k = 0;
5450   for (i=0; i<nsends; i++) {
5451     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5452     bufA  = bufa+sstartsj[i];
5453     for (j=0; j<nrows; j++) {
5454       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5455       for (ll=0; ll<sbs; ll++) {
5456         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5457         for (l=0; l<ncols; l++) {
5458           *bufA++ = vals[l];
5459         }
5460         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5461       }
5462     }
5463     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5464   }
5465   /* recvs and sends of a-array are completed */
5466   i = nrecvs;
5467   while (i--) {
5468     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5469   }
5470   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5471   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5472 
5473   if (scall == MAT_INITIAL_MATRIX) {
5474     /* put together the new matrix */
5475     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5476 
5477     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5478     /* Since these are PETSc arrays, change flags to free them as necessary. */
5479     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5480     b_oth->free_a  = PETSC_TRUE;
5481     b_oth->free_ij = PETSC_TRUE;
5482     b_oth->nonew   = 0;
5483 
5484     ierr = PetscFree(bufj);CHKERRQ(ierr);
5485     if (!startsj_s || !bufa_ptr) {
5486       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5487       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5488     } else {
5489       *startsj_s = sstartsj;
5490       *startsj_r = rstartsj;
5491       *bufa_ptr  = bufa;
5492     }
5493   }
5494   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5495   PetscFunctionReturn(0);
5496 }
5497 
5498 /*@C
5499   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5500 
5501   Not Collective
5502 
5503   Input Parameters:
5504 . A - The matrix in mpiaij format
5505 
5506   Output Parameter:
5507 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5508 . colmap - A map from global column index to local index into lvec
5509 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5510 
5511   Level: developer
5512 
5513 @*/
5514 #if defined(PETSC_USE_CTABLE)
5515 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5516 #else
5517 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5518 #endif
5519 {
5520   Mat_MPIAIJ *a;
5521 
5522   PetscFunctionBegin;
5523   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5524   PetscValidPointer(lvec, 2);
5525   PetscValidPointer(colmap, 3);
5526   PetscValidPointer(multScatter, 4);
5527   a = (Mat_MPIAIJ*) A->data;
5528   if (lvec) *lvec = a->lvec;
5529   if (colmap) *colmap = a->colmap;
5530   if (multScatter) *multScatter = a->Mvctx;
5531   PetscFunctionReturn(0);
5532 }
5533 
5534 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5536 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5537 #if defined(PETSC_HAVE_MKL_SPARSE)
5538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5539 #endif
5540 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5541 #if defined(PETSC_HAVE_ELEMENTAL)
5542 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5543 #endif
5544 #if defined(PETSC_HAVE_HYPRE)
5545 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5546 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5547 #endif
5548 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5549 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5550 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5551 
5552 /*
5553     Computes (B'*A')' since computing B*A directly is untenable
5554 
5555                n                       p                          p
5556         (              )       (              )         (                  )
5557       m (      A       )  *  n (       B      )   =   m (         C        )
5558         (              )       (              )         (                  )
5559 
5560 */
5561 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5562 {
5563   PetscErrorCode ierr;
5564   Mat            At,Bt,Ct;
5565 
5566   PetscFunctionBegin;
5567   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5568   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5569   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5570   ierr = MatDestroy(&At);CHKERRQ(ierr);
5571   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5572   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5573   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5574   PetscFunctionReturn(0);
5575 }
5576 
5577 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5578 {
5579   PetscErrorCode ierr;
5580   PetscInt       m=A->rmap->n,n=B->cmap->n;
5581   Mat            Cmat;
5582 
5583   PetscFunctionBegin;
5584   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5585   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5586   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5587   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5588   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5589   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5590   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5591   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5592 
5593   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5594 
5595   *C = Cmat;
5596   PetscFunctionReturn(0);
5597 }
5598 
5599 /* ----------------------------------------------------------------*/
5600 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5601 {
5602   PetscErrorCode ierr;
5603 
5604   PetscFunctionBegin;
5605   if (scall == MAT_INITIAL_MATRIX) {
5606     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5607     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5608     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5609   }
5610   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5611   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5612   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5613   PetscFunctionReturn(0);
5614 }
5615 
5616 /*MC
5617    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5618 
5619    Options Database Keys:
5620 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5621 
5622   Level: beginner
5623 
5624 .seealso: MatCreateAIJ()
5625 M*/
5626 
5627 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5628 {
5629   Mat_MPIAIJ     *b;
5630   PetscErrorCode ierr;
5631   PetscMPIInt    size;
5632 
5633   PetscFunctionBegin;
5634   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5635 
5636   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5637   B->data       = (void*)b;
5638   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5639   B->assembled  = PETSC_FALSE;
5640   B->insertmode = NOT_SET_VALUES;
5641   b->size       = size;
5642 
5643   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5644 
5645   /* build cache for off array entries formed */
5646   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5647 
5648   b->donotstash  = PETSC_FALSE;
5649   b->colmap      = 0;
5650   b->garray      = 0;
5651   b->roworiented = PETSC_TRUE;
5652 
5653   /* stuff used for matrix vector multiply */
5654   b->lvec  = NULL;
5655   b->Mvctx = NULL;
5656 
5657   /* stuff for MatGetRow() */
5658   b->rowindices   = 0;
5659   b->rowvalues    = 0;
5660   b->getrowactive = PETSC_FALSE;
5661 
5662   /* flexible pointer used in CUSP/CUSPARSE classes */
5663   b->spptr = NULL;
5664 
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5675 #if defined(PETSC_HAVE_MKL_SPARSE)
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5677 #endif
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5680 #if defined(PETSC_HAVE_ELEMENTAL)
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5682 #endif
5683 #if defined(PETSC_HAVE_HYPRE)
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5685 #endif
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5691 #if defined(PETSC_HAVE_HYPRE)
5692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5693 #endif
5694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5695   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5696   PetscFunctionReturn(0);
5697 }
5698 
5699 /*@C
5700      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5701          and "off-diagonal" part of the matrix in CSR format.
5702 
5703    Collective on MPI_Comm
5704 
5705    Input Parameters:
5706 +  comm - MPI communicator
5707 .  m - number of local rows (Cannot be PETSC_DECIDE)
5708 .  n - This value should be the same as the local size used in creating the
5709        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5710        calculated if N is given) For square matrices n is almost always m.
5711 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5712 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5713 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5714 .   j - column indices
5715 .   a - matrix values
5716 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5717 .   oj - column indices
5718 -   oa - matrix values
5719 
5720    Output Parameter:
5721 .   mat - the matrix
5722 
5723    Level: advanced
5724 
5725    Notes:
5726        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5727        must free the arrays once the matrix has been destroyed and not before.
5728 
5729        The i and j indices are 0 based
5730 
5731        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5732 
5733        This sets local rows and cannot be used to set off-processor values.
5734 
5735        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5736        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5737        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5738        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5739        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5740        communication if it is known that only local entries will be set.
5741 
5742 .keywords: matrix, aij, compressed row, sparse, parallel
5743 
5744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5745           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5746 @*/
5747 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5748 {
5749   PetscErrorCode ierr;
5750   Mat_MPIAIJ     *maij;
5751 
5752   PetscFunctionBegin;
5753   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5754   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5755   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5756   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5757   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5758   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5759   maij = (Mat_MPIAIJ*) (*mat)->data;
5760 
5761   (*mat)->preallocated = PETSC_TRUE;
5762 
5763   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5764   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5765 
5766   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5767   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5768 
5769   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5770   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5771   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773 
5774   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5775   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5776   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5777   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5778   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5779   PetscFunctionReturn(0);
5780 }
5781 
5782 /*
5783     Special version for direct calls from Fortran
5784 */
5785 #include <petsc/private/fortranimpl.h>
5786 
5787 /* Change these macros so can be used in void function */
5788 #undef CHKERRQ
5789 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5790 #undef SETERRQ2
5791 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5792 #undef SETERRQ3
5793 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5794 #undef SETERRQ
5795 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5796 
5797 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5798 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5799 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5800 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5801 #else
5802 #endif
5803 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5804 {
5805   Mat            mat  = *mmat;
5806   PetscInt       m    = *mm, n = *mn;
5807   InsertMode     addv = *maddv;
5808   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5809   PetscScalar    value;
5810   PetscErrorCode ierr;
5811 
5812   MatCheckPreallocated(mat,1);
5813   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5814 
5815 #if defined(PETSC_USE_DEBUG)
5816   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5817 #endif
5818   {
5819     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5820     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5821     PetscBool roworiented = aij->roworiented;
5822 
5823     /* Some Variables required in the macro */
5824     Mat        A                 = aij->A;
5825     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5826     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5827     MatScalar  *aa               = a->a;
5828     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5829     Mat        B                 = aij->B;
5830     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5831     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5832     MatScalar  *ba               = b->a;
5833 
5834     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5835     PetscInt  nonew = a->nonew;
5836     MatScalar *ap1,*ap2;
5837 
5838     PetscFunctionBegin;
5839     for (i=0; i<m; i++) {
5840       if (im[i] < 0) continue;
5841 #if defined(PETSC_USE_DEBUG)
5842       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5843 #endif
5844       if (im[i] >= rstart && im[i] < rend) {
5845         row      = im[i] - rstart;
5846         lastcol1 = -1;
5847         rp1      = aj + ai[row];
5848         ap1      = aa + ai[row];
5849         rmax1    = aimax[row];
5850         nrow1    = ailen[row];
5851         low1     = 0;
5852         high1    = nrow1;
5853         lastcol2 = -1;
5854         rp2      = bj + bi[row];
5855         ap2      = ba + bi[row];
5856         rmax2    = bimax[row];
5857         nrow2    = bilen[row];
5858         low2     = 0;
5859         high2    = nrow2;
5860 
5861         for (j=0; j<n; j++) {
5862           if (roworiented) value = v[i*n+j];
5863           else value = v[i+j*m];
5864           if (in[j] >= cstart && in[j] < cend) {
5865             col = in[j] - cstart;
5866             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5867             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5868           } else if (in[j] < 0) continue;
5869 #if defined(PETSC_USE_DEBUG)
5870           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5871           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5872 #endif
5873           else {
5874             if (mat->was_assembled) {
5875               if (!aij->colmap) {
5876                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5877               }
5878 #if defined(PETSC_USE_CTABLE)
5879               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5880               col--;
5881 #else
5882               col = aij->colmap[in[j]] - 1;
5883 #endif
5884               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5885               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5886                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5887                 col  =  in[j];
5888                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5889                 B     = aij->B;
5890                 b     = (Mat_SeqAIJ*)B->data;
5891                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5892                 rp2   = bj + bi[row];
5893                 ap2   = ba + bi[row];
5894                 rmax2 = bimax[row];
5895                 nrow2 = bilen[row];
5896                 low2  = 0;
5897                 high2 = nrow2;
5898                 bm    = aij->B->rmap->n;
5899                 ba    = b->a;
5900               }
5901             } else col = in[j];
5902             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5903           }
5904         }
5905       } else if (!aij->donotstash) {
5906         if (roworiented) {
5907           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5908         } else {
5909           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5910         }
5911       }
5912     }
5913   }
5914   PetscFunctionReturnVoid();
5915 }
5916