xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3d3eaba7093fd3d75f596ec754341dee3aba7588)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
837     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
838   } else if (diag != 0.0) {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
844     }
845     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   } else {
848     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
849   }
850   ierr = PetscFree(lrows);CHKERRQ(ierr);
851 
852   /* only change matrix nonzero state if pattern was allowed to be changed */
853   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
854     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
855     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
865   PetscErrorCode    ierr;
866   PetscMPIInt       n = A->rmap->n;
867   PetscInt          i,j,r,m,p = 0,len = 0;
868   PetscInt          *lrows,*owners = A->rmap->range;
869   PetscSFNode       *rrows;
870   PetscSF           sf;
871   const PetscScalar *xx;
872   PetscScalar       *bb,*mask;
873   Vec               xmask,lmask;
874   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
875   const PetscInt    *aj, *ii,*ridx;
876   PetscScalar       *aa;
877 
878   PetscFunctionBegin;
879   /* Create SF where leaves are input rows and roots are owned rows */
880   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
881   for (r = 0; r < n; ++r) lrows[r] = -1;
882   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
883   for (r = 0; r < N; ++r) {
884     const PetscInt idx   = rows[r];
885     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
886     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
887       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
888     }
889     rrows[r].rank  = p;
890     rrows[r].index = rows[r] - owners[p];
891   }
892   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
893   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
894   /* Collect flags for rows to be zeroed */
895   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
896   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
897   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
898   /* Compress and put in row numbers */
899   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
900   /* zero diagonal part of matrix */
901   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
902   /* handle off diagonal part of matrix */
903   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
904   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
905   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
906   for (i=0; i<len; i++) bb[lrows[i]] = 1;
907   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
908   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
911   if (x) {
912     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
916   }
917   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
918   /* remove zeroed rows of off diagonal matrix */
919   ii = aij->i;
920   for (i=0; i<len; i++) {
921     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
922   }
923   /* loop over all elements of off process part of matrix zeroing removed columns*/
924   if (aij->compressedrow.use) {
925     m    = aij->compressedrow.nrows;
926     ii   = aij->compressedrow.i;
927     ridx = aij->compressedrow.rindex;
928     for (i=0; i<m; i++) {
929       n  = ii[i+1] - ii[i];
930       aj = aij->j + ii[i];
931       aa = aij->a + ii[i];
932 
933       for (j=0; j<n; j++) {
934         if (PetscAbsScalar(mask[*aj])) {
935           if (b) bb[*ridx] -= *aa*xx[*aj];
936           *aa = 0.0;
937         }
938         aa++;
939         aj++;
940       }
941       ridx++;
942     }
943   } else { /* do not use compressed row format */
944     m = l->B->rmap->n;
945     for (i=0; i<m; i++) {
946       n  = ii[i+1] - ii[i];
947       aj = aij->j + ii[i];
948       aa = aij->a + ii[i];
949       for (j=0; j<n; j++) {
950         if (PetscAbsScalar(mask[*aj])) {
951           if (b) bb[i] -= *aa*xx[*aj];
952           *aa = 0.0;
953         }
954         aa++;
955         aj++;
956       }
957     }
958   }
959   if (x) {
960     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
961     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962   }
963   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
964   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
965   ierr = PetscFree(lrows);CHKERRQ(ierr);
966 
967   /* only change matrix nonzero state if pattern was allowed to be changed */
968   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
969     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
970     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMult_MPIAIJ"
977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscInt       nt;
982 
983   PetscFunctionBegin;
984   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
985   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1008 {
1009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscBool      merged;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1030   /* do nondiagonal part */
1031   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1032   if (!merged) {
1033     /* send it on its way */
1034     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* receive remote parts: note this assumes the values are not actually */
1038     /* added in yy until the next line, */
1039     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1040   } else {
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* values actually were received in the Begin() but we need to call this nop */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1053 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1054 {
1055   MPI_Comm       comm;
1056   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1057   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1058   IS             Me,Notme;
1059   PetscErrorCode ierr;
1060   PetscInt       M,N,first,last,*notme,i;
1061   PetscMPIInt    size;
1062 
1063   PetscFunctionBegin;
1064   /* Easy test: symmetric diagonal block */
1065   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1066   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1067   if (!*f) PetscFunctionReturn(0);
1068   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1070   if (size == 1) PetscFunctionReturn(0);
1071 
1072   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1073   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1074   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1075   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1076   for (i=0; i<first; i++) notme[i] = i;
1077   for (i=last; i<M; i++) notme[i-last+first] = i;
1078   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1079   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1080   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1081   Aoff = Aoffs[0];
1082   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1083   Boff = Boffs[0];
1084   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1086   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1088   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1089   ierr = PetscFree(notme);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   /* do nondiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1103   /* send it on its way */
1104   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1105   /* do local part */
1106   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   /* receive remote parts */
1108   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1119 {
1120   PetscErrorCode ierr;
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122 
1123   PetscFunctionBegin;
1124   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1125   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPIAIJ"
1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1139   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatDestroy_MPIAIJ"
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1167   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1168   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1169 
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1191   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1192   PetscErrorCode ierr;
1193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1194   int            fd;
1195   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1196   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1197   PetscScalar    *column_values;
1198   PetscInt       message_count,flowcontrolcount;
1199   FILE           *file;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1203   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1204   nz   = A->nz + B->nz;
1205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     /* get largest number of rows any processor has */
1214     rlen  = mat->rmap->n;
1215     range = mat->rmap->range;
1216     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1217   } else {
1218     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     rlen = mat->rmap->n;
1220   }
1221 
1222   /* load up the local row counts */
1223   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1224   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1225 
1226   /* store the row lengths to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1232       rlen = range[i+1] - range[i];
1233       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1239     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1243 
1244   /* load up the local column indices */
1245   nzmax = nz; /* th processor needs space a largest processor needs */
1246   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1248   cnt   = 0;
1249   for (i=0; i<mat->rmap->n; i++) {
1250     for (j=B->i[i]; j<B->i[i+1]; j++) {
1251       if ((col = garray[B->j[j]]) > cstart) break;
1252       column_indices[cnt++] = col;
1253     }
1254     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1255     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1256   }
1257   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1258 
1259   /* store the column indices to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     MPI_Status status;
1263     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1267       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1268       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     }
1271     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1272   } else {
1273     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1274     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1277   }
1278   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1279 
1280   /* load up the local column values */
1281   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1282   cnt  = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if (garray[B->j[j]] > cstart) break;
1286       column_values[cnt++] = B->a[j];
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1289     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column values to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_values);CHKERRQ(ierr);
1313 
1314   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1315   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #include <petscdraw.h>
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1323 {
1324   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1325   PetscErrorCode    ierr;
1326   PetscMPIInt       rank = aij->rank,size = aij->size;
1327   PetscBool         isdraw,iascii,isbinary;
1328   PetscViewer       sviewer;
1329   PetscViewerFormat format;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1335   if (iascii) {
1336     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1337     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1338       MatInfo   info;
1339       PetscBool inodes;
1340 
1341       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1342       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1344       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1345       if (!inodes) {
1346         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1347                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1348       } else {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       }
1352       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1354       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1359       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1360       PetscFunctionReturn(0);
1361     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1362       PetscInt inodecount,inodelimit,*inodes;
1363       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1364       if (inodes) {
1365         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1368       }
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1371       PetscFunctionReturn(0);
1372     }
1373   } else if (isbinary) {
1374     if (size == 1) {
1375       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1376       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1379     }
1380     PetscFunctionReturn(0);
1381   } else if (isdraw) {
1382     PetscDraw draw;
1383     PetscBool isnull;
1384     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1385     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1386     if (isnull) PetscFunctionReturn(0);
1387   }
1388 
1389   {
1390     /* assemble the entire matrix onto first processor. */
1391     Mat        A;
1392     Mat_SeqAIJ *Aloc;
1393     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1394     MatScalar  *a;
1395 
1396     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1397     if (!rank) {
1398       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1399     } else {
1400       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1401     }
1402     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1403     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1404     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1405     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1406     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1407 
1408     /* copy over the A part */
1409     Aloc = (Mat_SeqAIJ*)aij->A->data;
1410     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1411     row  = mat->rmap->rstart;
1412     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1413     for (i=0; i<m; i++) {
1414       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1415       row++;
1416       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1417     }
1418     aj = Aloc->j;
1419     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1420 
1421     /* copy over the B part */
1422     Aloc = (Mat_SeqAIJ*)aij->B->data;
1423     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1424     row  = mat->rmap->rstart;
1425     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1426     ct   = cols;
1427     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1428     for (i=0; i<m; i++) {
1429       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1430       row++;
1431       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1432     }
1433     ierr = PetscFree(ct);CHKERRQ(ierr);
1434     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436     /*
1437        Everyone has to call to draw the matrix since the graphics waits are
1438        synchronized across all processors that share the PetscDraw object
1439     */
1440     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1441     if (!rank) {
1442       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1443       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1444     }
1445     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1446     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1447     ierr = MatDestroy(&A);CHKERRQ(ierr);
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatView_MPIAIJ"
1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1455 {
1456   PetscErrorCode ierr;
1457   PetscBool      iascii,isdraw,issocket,isbinary;
1458 
1459   PetscFunctionBegin;
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1463   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1464   if (iascii || isdraw || isbinary || issocket) {
1465     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatSOR_MPIAIJ"
1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1473 {
1474   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1475   PetscErrorCode ierr;
1476   Vec            bb1 = 0;
1477   PetscBool      hasop;
1478 
1479   PetscFunctionBegin;
1480   if (flag == SOR_APPLY_UPPER) {
1481     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482     PetscFunctionReturn(0);
1483   }
1484 
1485   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1486     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1487   }
1488 
1489   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1490     if (flag & SOR_ZERO_INITIAL_GUESS) {
1491       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1492       its--;
1493     }
1494 
1495     while (its--) {
1496       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498 
1499       /* update rhs: bb1 = bb - B*x */
1500       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1501       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1502 
1503       /* local sweep */
1504       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1505     }
1506   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1507     if (flag & SOR_ZERO_INITIAL_GUESS) {
1508       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1509       its--;
1510     }
1511     while (its--) {
1512       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514 
1515       /* update rhs: bb1 = bb - B*x */
1516       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1517       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1518 
1519       /* local sweep */
1520       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1521     }
1522   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1523     if (flag & SOR_ZERO_INITIAL_GUESS) {
1524       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1525       its--;
1526     }
1527     while (its--) {
1528       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530 
1531       /* update rhs: bb1 = bb - B*x */
1532       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1533       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1534 
1535       /* local sweep */
1536       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1537     }
1538   } else if (flag & SOR_EISENSTAT) {
1539     Vec xx1;
1540 
1541     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1542     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1543 
1544     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546     if (!mat->diag) {
1547       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1548       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1549     }
1550     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1551     if (hasop) {
1552       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1553     } else {
1554       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1555     }
1556     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1557 
1558     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1559 
1560     /* local sweep */
1561     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1562     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1563     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1564   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1565 
1566   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1567 
1568   matin->errortype = mat->A->errortype;
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatPermute_MPIAIJ"
1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1575 {
1576   Mat            aA,aB,Aperm;
1577   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1578   PetscScalar    *aa,*ba;
1579   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1580   PetscSF        rowsf,sf;
1581   IS             parcolp = NULL;
1582   PetscBool      done;
1583   PetscErrorCode ierr;
1584 
1585   PetscFunctionBegin;
1586   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1587   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1588   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1589   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1590 
1591   /* Invert row permutation to find out where my rows should go */
1592   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1593   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1594   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1595   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1596   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1597   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1598 
1599   /* Invert column permutation to find out where my columns should go */
1600   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1601   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1602   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1603   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1604   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1605   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1606   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1607 
1608   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1609   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1610   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1611 
1612   /* Find out where my gcols should go */
1613   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1614   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1615   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1616   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1617   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1618   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1620   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1621 
1622   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1623   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1624   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1625   for (i=0; i<m; i++) {
1626     PetscInt row = rdest[i],rowner;
1627     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1628     for (j=ai[i]; j<ai[i+1]; j++) {
1629       PetscInt cowner,col = cdest[aj[j]];
1630       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1631       if (rowner == cowner) dnnz[i]++;
1632       else onnz[i]++;
1633     }
1634     for (j=bi[i]; j<bi[i+1]; j++) {
1635       PetscInt cowner,col = gcdest[bj[j]];
1636       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1637       if (rowner == cowner) dnnz[i]++;
1638       else onnz[i]++;
1639     }
1640   }
1641   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1642   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1643   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1644   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1645   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1646 
1647   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1648   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1649   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1650   for (i=0; i<m; i++) {
1651     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1652     PetscInt j0,rowlen;
1653     rowlen = ai[i+1] - ai[i];
1654     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1655       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1656       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1657     }
1658     rowlen = bi[i+1] - bi[i];
1659     for (j0=j=0; j<rowlen; j0=j) {
1660       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1661       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1662     }
1663   }
1664   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1667   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1668   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1669   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1670   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1671   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1672   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1673   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1674   *B = Aperm;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1680 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1681 {
1682   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1687   if (ghosts) *ghosts = aij->garray;
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1693 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1694 {
1695   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1696   Mat            A    = mat->A,B = mat->B;
1697   PetscErrorCode ierr;
1698   PetscReal      isend[5],irecv[5];
1699 
1700   PetscFunctionBegin;
1701   info->block_size = 1.0;
1702   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1703 
1704   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1705   isend[3] = info->memory;  isend[4] = info->mallocs;
1706 
1707   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1708 
1709   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1710   isend[3] += info->memory;  isend[4] += info->mallocs;
1711   if (flag == MAT_LOCAL) {
1712     info->nz_used      = isend[0];
1713     info->nz_allocated = isend[1];
1714     info->nz_unneeded  = isend[2];
1715     info->memory       = isend[3];
1716     info->mallocs      = isend[4];
1717   } else if (flag == MAT_GLOBAL_MAX) {
1718     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1719 
1720     info->nz_used      = irecv[0];
1721     info->nz_allocated = irecv[1];
1722     info->nz_unneeded  = irecv[2];
1723     info->memory       = irecv[3];
1724     info->mallocs      = irecv[4];
1725   } else if (flag == MAT_GLOBAL_SUM) {
1726     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1727 
1728     info->nz_used      = irecv[0];
1729     info->nz_allocated = irecv[1];
1730     info->nz_unneeded  = irecv[2];
1731     info->memory       = irecv[3];
1732     info->mallocs      = irecv[4];
1733   }
1734   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1735   info->fill_ratio_needed = 0;
1736   info->factor_mallocs    = 0;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatSetOption_MPIAIJ"
1742 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1743 {
1744   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1745   PetscErrorCode ierr;
1746 
1747   PetscFunctionBegin;
1748   switch (op) {
1749   case MAT_NEW_NONZERO_LOCATIONS:
1750   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1751   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1752   case MAT_KEEP_NONZERO_PATTERN:
1753   case MAT_NEW_NONZERO_LOCATION_ERR:
1754   case MAT_USE_INODES:
1755   case MAT_IGNORE_ZERO_ENTRIES:
1756     MatCheckPreallocated(A,1);
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_ROW_ORIENTED:
1761     MatCheckPreallocated(A,1);
1762     a->roworiented = flg;
1763 
1764     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1765     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1766     break;
1767   case MAT_NEW_DIAGONALS:
1768     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1769     break;
1770   case MAT_IGNORE_OFF_PROC_ENTRIES:
1771     a->donotstash = flg;
1772     break;
1773   case MAT_SPD:
1774     A->spd_set = PETSC_TRUE;
1775     A->spd     = flg;
1776     if (flg) {
1777       A->symmetric                  = PETSC_TRUE;
1778       A->structurally_symmetric     = PETSC_TRUE;
1779       A->symmetric_set              = PETSC_TRUE;
1780       A->structurally_symmetric_set = PETSC_TRUE;
1781     }
1782     break;
1783   case MAT_SYMMETRIC:
1784     MatCheckPreallocated(A,1);
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     break;
1787   case MAT_STRUCTURALLY_SYMMETRIC:
1788     MatCheckPreallocated(A,1);
1789     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_HERMITIAN:
1792     MatCheckPreallocated(A,1);
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_SYMMETRY_ETERNAL:
1796     MatCheckPreallocated(A,1);
1797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1798     break;
1799   default:
1800     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1801   }
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatGetRow_MPIAIJ"
1807 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1808 {
1809   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1810   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1811   PetscErrorCode ierr;
1812   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1813   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1814   PetscInt       *cmap,*idx_p;
1815 
1816   PetscFunctionBegin;
1817   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1818   mat->getrowactive = PETSC_TRUE;
1819 
1820   if (!mat->rowvalues && (idx || v)) {
1821     /*
1822         allocate enough space to hold information from the longest row.
1823     */
1824     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1825     PetscInt   max = 1,tmp;
1826     for (i=0; i<matin->rmap->n; i++) {
1827       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1828       if (max < tmp) max = tmp;
1829     }
1830     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1831   }
1832 
1833   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1834   lrow = row - rstart;
1835 
1836   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1837   if (!v)   {pvA = 0; pvB = 0;}
1838   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1839   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1840   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1841   nztot = nzA + nzB;
1842 
1843   cmap = mat->garray;
1844   if (v  || idx) {
1845     if (nztot) {
1846       /* Sort by increasing column numbers, assuming A and B already sorted */
1847       PetscInt imark = -1;
1848       if (v) {
1849         *v = v_p = mat->rowvalues;
1850         for (i=0; i<nzB; i++) {
1851           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1852           else break;
1853         }
1854         imark = i;
1855         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1856         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1857       }
1858       if (idx) {
1859         *idx = idx_p = mat->rowindices;
1860         if (imark > -1) {
1861           for (i=0; i<imark; i++) {
1862             idx_p[i] = cmap[cworkB[i]];
1863           }
1864         } else {
1865           for (i=0; i<nzB; i++) {
1866             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1867             else break;
1868           }
1869           imark = i;
1870         }
1871         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1872         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1873       }
1874     } else {
1875       if (idx) *idx = 0;
1876       if (v)   *v   = 0;
1877     }
1878   }
1879   *nz  = nztot;
1880   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1881   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1887 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1888 {
1889   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1890 
1891   PetscFunctionBegin;
1892   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1893   aij->getrowactive = PETSC_FALSE;
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatNorm_MPIAIJ"
1899 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1900 {
1901   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1902   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1903   PetscErrorCode ierr;
1904   PetscInt       i,j,cstart = mat->cmap->rstart;
1905   PetscReal      sum = 0.0;
1906   MatScalar      *v;
1907 
1908   PetscFunctionBegin;
1909   if (aij->size == 1) {
1910     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1911   } else {
1912     if (type == NORM_FROBENIUS) {
1913       v = amat->a;
1914       for (i=0; i<amat->nz; i++) {
1915         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1916       }
1917       v = bmat->a;
1918       for (i=0; i<bmat->nz; i++) {
1919         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1920       }
1921       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1922       *norm = PetscSqrtReal(*norm);
1923     } else if (type == NORM_1) { /* max column norm */
1924       PetscReal *tmp,*tmp2;
1925       PetscInt  *jj,*garray = aij->garray;
1926       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1927       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1928       *norm = 0.0;
1929       v     = amat->a; jj = amat->j;
1930       for (j=0; j<amat->nz; j++) {
1931         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1932       }
1933       v = bmat->a; jj = bmat->j;
1934       for (j=0; j<bmat->nz; j++) {
1935         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1936       }
1937       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1938       for (j=0; j<mat->cmap->N; j++) {
1939         if (tmp2[j] > *norm) *norm = tmp2[j];
1940       }
1941       ierr = PetscFree(tmp);CHKERRQ(ierr);
1942       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1943     } else if (type == NORM_INFINITY) { /* max row norm */
1944       PetscReal ntemp = 0.0;
1945       for (j=0; j<aij->A->rmap->n; j++) {
1946         v   = amat->a + amat->i[j];
1947         sum = 0.0;
1948         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1949           sum += PetscAbsScalar(*v); v++;
1950         }
1951         v = bmat->a + bmat->i[j];
1952         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1953           sum += PetscAbsScalar(*v); v++;
1954         }
1955         if (sum > ntemp) ntemp = sum;
1956       }
1957       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1958     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1959   }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatTranspose_MPIAIJ"
1965 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1966 {
1967   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1968   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1969   PetscErrorCode ierr;
1970   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1971   PetscInt       cstart = A->cmap->rstart,ncol;
1972   Mat            B;
1973   MatScalar      *array;
1974 
1975   PetscFunctionBegin;
1976   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1977 
1978   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1979   ai = Aloc->i; aj = Aloc->j;
1980   bi = Bloc->i; bj = Bloc->j;
1981   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1982     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1983     PetscSFNode          *oloc;
1984     PETSC_UNUSED PetscSF sf;
1985 
1986     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1987     /* compute d_nnz for preallocation */
1988     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1989     for (i=0; i<ai[ma]; i++) {
1990       d_nnz[aj[i]]++;
1991       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1992     }
1993     /* compute local off-diagonal contributions */
1994     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1995     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1996     /* map those to global */
1997     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1998     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1999     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2000     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2001     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2003     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2004 
2005     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2006     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2007     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2008     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2009     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2010     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2011   } else {
2012     B    = *matout;
2013     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2014     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2015   }
2016 
2017   /* copy over the A part */
2018   array = Aloc->a;
2019   row   = A->rmap->rstart;
2020   for (i=0; i<ma; i++) {
2021     ncol = ai[i+1]-ai[i];
2022     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2023     row++;
2024     array += ncol; aj += ncol;
2025   }
2026   aj = Aloc->j;
2027   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2028 
2029   /* copy over the B part */
2030   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2031   array = Bloc->a;
2032   row   = A->rmap->rstart;
2033   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2034   cols_tmp = cols;
2035   for (i=0; i<mb; i++) {
2036     ncol = bi[i+1]-bi[i];
2037     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2038     row++;
2039     array += ncol; cols_tmp += ncol;
2040   }
2041   ierr = PetscFree(cols);CHKERRQ(ierr);
2042 
2043   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2045   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2046     *matout = B;
2047   } else {
2048     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2049   }
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2055 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2056 {
2057   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2058   Mat            a    = aij->A,b = aij->B;
2059   PetscErrorCode ierr;
2060   PetscInt       s1,s2,s3;
2061 
2062   PetscFunctionBegin;
2063   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2064   if (rr) {
2065     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2066     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2067     /* Overlap communication with computation. */
2068     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2069   }
2070   if (ll) {
2071     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2072     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2073     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2074   }
2075   /* scale  the diagonal block */
2076   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2077 
2078   if (rr) {
2079     /* Do a scatter end and then right scale the off-diagonal block */
2080     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2081     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2088 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2089 {
2090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2091   PetscErrorCode ierr;
2092 
2093   PetscFunctionBegin;
2094   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatEqual_MPIAIJ"
2100 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2101 {
2102   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2103   Mat            a,b,c,d;
2104   PetscBool      flg;
2105   PetscErrorCode ierr;
2106 
2107   PetscFunctionBegin;
2108   a = matA->A; b = matA->B;
2109   c = matB->A; d = matB->B;
2110 
2111   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2112   if (flg) {
2113     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2114   }
2115   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatCopy_MPIAIJ"
2121 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2122 {
2123   PetscErrorCode ierr;
2124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2125   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2126 
2127   PetscFunctionBegin;
2128   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2129   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2130     /* because of the column compression in the off-processor part of the matrix a->B,
2131        the number of columns in a->B and b->B may be different, hence we cannot call
2132        the MatCopy() directly on the two parts. If need be, we can provide a more
2133        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2134        then copying the submatrices */
2135     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2136   } else {
2137     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2138     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2139   }
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatSetUp_MPIAIJ"
2145 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2146 {
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 /*
2155    Computes the number of nonzeros per row needed for preallocation when X and Y
2156    have different nonzero structure.
2157 */
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2160 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2161 {
2162   PetscInt       i,j,k,nzx,nzy;
2163 
2164   PetscFunctionBegin;
2165   /* Set the number of nonzeros in the new matrix */
2166   for (i=0; i<m; i++) {
2167     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2168     nzx = xi[i+1] - xi[i];
2169     nzy = yi[i+1] - yi[i];
2170     nnz[i] = 0;
2171     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2172       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2173       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2174       nnz[i]++;
2175     }
2176     for (; k<nzy; k++) nnz[i]++;
2177   }
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2184 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2185 {
2186   PetscErrorCode ierr;
2187   PetscInt       m = Y->rmap->N;
2188   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2189   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2190 
2191   PetscFunctionBegin;
2192   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatAXPY_MPIAIJ"
2198 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2199 {
2200   PetscErrorCode ierr;
2201   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2202   PetscBLASInt   bnz,one=1;
2203   Mat_SeqAIJ     *x,*y;
2204 
2205   PetscFunctionBegin;
2206   if (str == SAME_NONZERO_PATTERN) {
2207     PetscScalar alpha = a;
2208     x    = (Mat_SeqAIJ*)xx->A->data;
2209     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2210     y    = (Mat_SeqAIJ*)yy->A->data;
2211     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2212     x    = (Mat_SeqAIJ*)xx->B->data;
2213     y    = (Mat_SeqAIJ*)yy->B->data;
2214     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2215     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2216     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2217   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2218     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2219   } else {
2220     Mat      B;
2221     PetscInt *nnz_d,*nnz_o;
2222     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2223     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2224     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2225     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2226     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2227     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2228     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2230     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2231     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2232     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2233     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2235     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2236   }
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2241 
2242 #undef __FUNCT__
2243 #define __FUNCT__ "MatConjugate_MPIAIJ"
2244 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2245 {
2246 #if defined(PETSC_USE_COMPLEX)
2247   PetscErrorCode ierr;
2248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2249 
2250   PetscFunctionBegin;
2251   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2252   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2253 #else
2254   PetscFunctionBegin;
2255 #endif
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatRealPart_MPIAIJ"
2261 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2262 {
2263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2268   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2274 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2275 {
2276   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2277   PetscErrorCode ierr;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2281   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2287 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2288 {
2289   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2290   PetscErrorCode ierr;
2291   PetscInt       i,*idxb = 0;
2292   PetscScalar    *va,*vb;
2293   Vec            vtmp;
2294 
2295   PetscFunctionBegin;
2296   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2297   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2298   if (idx) {
2299     for (i=0; i<A->rmap->n; i++) {
2300       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2301     }
2302   }
2303 
2304   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2305   if (idx) {
2306     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2307   }
2308   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2309   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2310 
2311   for (i=0; i<A->rmap->n; i++) {
2312     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2313       va[i] = vb[i];
2314       if (idx) idx[i] = a->garray[idxb[i]];
2315     }
2316   }
2317 
2318   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2319   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2320   ierr = PetscFree(idxb);CHKERRQ(ierr);
2321   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2327 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2328 {
2329   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2330   PetscErrorCode ierr;
2331   PetscInt       i,*idxb = 0;
2332   PetscScalar    *va,*vb;
2333   Vec            vtmp;
2334 
2335   PetscFunctionBegin;
2336   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2337   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2338   if (idx) {
2339     for (i=0; i<A->cmap->n; i++) {
2340       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2341     }
2342   }
2343 
2344   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2345   if (idx) {
2346     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2347   }
2348   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2349   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2350 
2351   for (i=0; i<A->rmap->n; i++) {
2352     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2353       va[i] = vb[i];
2354       if (idx) idx[i] = a->garray[idxb[i]];
2355     }
2356   }
2357 
2358   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2359   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2360   ierr = PetscFree(idxb);CHKERRQ(ierr);
2361   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2367 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2368 {
2369   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2370   PetscInt       n      = A->rmap->n;
2371   PetscInt       cstart = A->cmap->rstart;
2372   PetscInt       *cmap  = mat->garray;
2373   PetscInt       *diagIdx, *offdiagIdx;
2374   Vec            diagV, offdiagV;
2375   PetscScalar    *a, *diagA, *offdiagA;
2376   PetscInt       r;
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2382   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2383   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2384   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2385   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2386   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2387   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2388   for (r = 0; r < n; ++r) {
2389     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2390       a[r]   = diagA[r];
2391       idx[r] = cstart + diagIdx[r];
2392     } else {
2393       a[r]   = offdiagA[r];
2394       idx[r] = cmap[offdiagIdx[r]];
2395     }
2396   }
2397   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2398   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2399   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2400   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2401   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2402   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2408 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2409 {
2410   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2411   PetscInt       n      = A->rmap->n;
2412   PetscInt       cstart = A->cmap->rstart;
2413   PetscInt       *cmap  = mat->garray;
2414   PetscInt       *diagIdx, *offdiagIdx;
2415   Vec            diagV, offdiagV;
2416   PetscScalar    *a, *diagA, *offdiagA;
2417   PetscInt       r;
2418   PetscErrorCode ierr;
2419 
2420   PetscFunctionBegin;
2421   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2423   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2424   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2425   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2426   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2427   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2428   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2429   for (r = 0; r < n; ++r) {
2430     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2431       a[r]   = diagA[r];
2432       idx[r] = cstart + diagIdx[r];
2433     } else {
2434       a[r]   = offdiagA[r];
2435       idx[r] = cmap[offdiagIdx[r]];
2436     }
2437   }
2438   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2439   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2440   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2441   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2442   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2443   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2449 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2450 {
2451   PetscErrorCode ierr;
2452   Mat            *dummy;
2453 
2454   PetscFunctionBegin;
2455   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2456   *newmat = *dummy;
2457   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2463 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2464 {
2465   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2466   PetscErrorCode ierr;
2467 
2468   PetscFunctionBegin;
2469   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2470   A->errortype = a->A->errortype;
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2476 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2477 {
2478   PetscErrorCode ierr;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2480 
2481   PetscFunctionBegin;
2482   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2483   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2484   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2492 {
2493   PetscFunctionBegin;
2494   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2495   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2501 /*@
2502    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2503 
2504    Collective on Mat
2505 
2506    Input Parameters:
2507 +    A - the matrix
2508 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2509 
2510  Level: advanced
2511 
2512 @*/
2513 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2514 {
2515   PetscErrorCode       ierr;
2516 
2517   PetscFunctionBegin;
2518   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2524 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2525 {
2526   PetscErrorCode       ierr;
2527   PetscBool            sc = PETSC_FALSE,flg;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2531   ierr = PetscObjectOptionsBegin((PetscObject)A);
2532     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2533     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2534     if (flg) {
2535       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2536     }
2537   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2538   PetscFunctionReturn(0);
2539 }
2540 
2541 #undef __FUNCT__
2542 #define __FUNCT__ "MatShift_MPIAIJ"
2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2544 {
2545   PetscErrorCode ierr;
2546   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2547   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2548 
2549   PetscFunctionBegin;
2550   if (!Y->preallocated) {
2551     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2552   } else if (!aij->nz) {
2553     PetscInt nonew = aij->nonew;
2554     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2555     aij->nonew = nonew;
2556   }
2557   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2564 {
2565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2570   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2571   if (d) {
2572     PetscInt rstart;
2573     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2574     *d += rstart;
2575 
2576   }
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 
2581 /* -------------------------------------------------------------------*/
2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2583                                        MatGetRow_MPIAIJ,
2584                                        MatRestoreRow_MPIAIJ,
2585                                        MatMult_MPIAIJ,
2586                                 /* 4*/ MatMultAdd_MPIAIJ,
2587                                        MatMultTranspose_MPIAIJ,
2588                                        MatMultTransposeAdd_MPIAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                 /*10*/ 0,
2593                                        0,
2594                                        0,
2595                                        MatSOR_MPIAIJ,
2596                                        MatTranspose_MPIAIJ,
2597                                 /*15*/ MatGetInfo_MPIAIJ,
2598                                        MatEqual_MPIAIJ,
2599                                        MatGetDiagonal_MPIAIJ,
2600                                        MatDiagonalScale_MPIAIJ,
2601                                        MatNorm_MPIAIJ,
2602                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2603                                        MatAssemblyEnd_MPIAIJ,
2604                                        MatSetOption_MPIAIJ,
2605                                        MatZeroEntries_MPIAIJ,
2606                                 /*24*/ MatZeroRows_MPIAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                 /*29*/ MatSetUp_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                 /*34*/ MatDuplicate_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                 /*39*/ MatAXPY_MPIAIJ,
2622                                        MatGetSubMatrices_MPIAIJ,
2623                                        MatIncreaseOverlap_MPIAIJ,
2624                                        MatGetValues_MPIAIJ,
2625                                        MatCopy_MPIAIJ,
2626                                 /*44*/ MatGetRowMax_MPIAIJ,
2627                                        MatScale_MPIAIJ,
2628                                        MatShift_MPIAIJ,
2629                                        MatDiagonalSet_MPIAIJ,
2630                                        MatZeroRowsColumns_MPIAIJ,
2631                                 /*49*/ MatSetRandom_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2637                                        0,
2638                                        MatSetUnfactored_MPIAIJ,
2639                                        MatPermute_MPIAIJ,
2640                                        0,
2641                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2642                                        MatDestroy_MPIAIJ,
2643                                        MatView_MPIAIJ,
2644                                        0,
2645                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2646                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2652                                        MatGetRowMinAbs_MPIAIJ,
2653                                        0,
2654                                        MatSetColoring_MPIAIJ,
2655                                        0,
2656                                        MatSetValuesAdifor_MPIAIJ,
2657                                 /*75*/ MatFDColoringApply_AIJ,
2658                                        MatSetFromOptions_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        MatFindZeroDiagonals_MPIAIJ,
2662                                 /*80*/ 0,
2663                                        0,
2664                                        0,
2665                                 /*83*/ MatLoad_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2672                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2673                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2674                                        MatPtAP_MPIAIJ_MPIAIJ,
2675                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2676                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*99*/ 0,
2682                                        0,
2683                                        0,
2684                                        MatConjugate_MPIAIJ,
2685                                        0,
2686                                 /*104*/MatSetValuesRow_MPIAIJ,
2687                                        MatRealPart_MPIAIJ,
2688                                        MatImaginaryPart_MPIAIJ,
2689                                        0,
2690                                        0,
2691                                 /*109*/0,
2692                                        0,
2693                                        MatGetRowMin_MPIAIJ,
2694                                        0,
2695                                        MatMissingDiagonal_MPIAIJ,
2696                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2697                                        0,
2698                                        MatGetGhosts_MPIAIJ,
2699                                        0,
2700                                        0,
2701                                 /*119*/0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        MatGetMultiProcBlock_MPIAIJ,
2706                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2707                                        MatGetColumnNorms_MPIAIJ,
2708                                        MatInvertBlockDiagonal_MPIAIJ,
2709                                        0,
2710                                        MatGetSubMatricesMPI_MPIAIJ,
2711                                 /*129*/0,
2712                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2713                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2714                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2715                                        0,
2716                                 /*134*/0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                 /*139*/0,
2722                                        0,
2723                                        0,
2724                                        MatFDColoringSetUp_MPIXAIJ,
2725                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2726                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2727 };
2728 
2729 /* ----------------------------------------------------------------------------------------*/
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2733 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2734 {
2735   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2740   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2746 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2747 {
2748   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2753   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2759 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763 
2764   PetscFunctionBegin;
2765   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   b = (Mat_MPIAIJ*)B->data;
2768 
2769   if (!B->preallocated) {
2770     /* Explicitly create 2 MATSEQAIJ matrices. */
2771     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2772     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2773     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2774     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2775     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2776     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2777     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2778     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2779     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2780     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2781   }
2782 
2783   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2784   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2785   B->preallocated = PETSC_TRUE;
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2791 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2792 {
2793   Mat            mat;
2794   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2795   PetscErrorCode ierr;
2796 
2797   PetscFunctionBegin;
2798   *newmat = 0;
2799   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2800   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2801   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2802   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2803   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2804   a       = (Mat_MPIAIJ*)mat->data;
2805 
2806   mat->factortype   = matin->factortype;
2807   mat->assembled    = PETSC_TRUE;
2808   mat->insertmode   = NOT_SET_VALUES;
2809   mat->preallocated = PETSC_TRUE;
2810 
2811   a->size         = oldmat->size;
2812   a->rank         = oldmat->rank;
2813   a->donotstash   = oldmat->donotstash;
2814   a->roworiented  = oldmat->roworiented;
2815   a->rowindices   = 0;
2816   a->rowvalues    = 0;
2817   a->getrowactive = PETSC_FALSE;
2818 
2819   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2821 
2822   if (oldmat->colmap) {
2823 #if defined(PETSC_USE_CTABLE)
2824     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2825 #else
2826     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2827     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2828     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2829 #endif
2830   } else a->colmap = 0;
2831   if (oldmat->garray) {
2832     PetscInt len;
2833     len  = oldmat->B->cmap->n;
2834     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2835     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2836     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2837   } else a->garray = 0;
2838 
2839   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2840   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2841   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2842   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2843   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2844   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2845   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2846   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2847   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2848   *newmat = mat;
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatLoad_MPIAIJ"
2856 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2857 {
2858   PetscScalar    *vals,*svals;
2859   MPI_Comm       comm;
2860   PetscErrorCode ierr;
2861   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2862   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2863   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2864   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2865   PetscInt       cend,cstart,n,*rowners;
2866   int            fd;
2867   PetscInt       bs = newMat->rmap->bs;
2868 
2869   PetscFunctionBegin;
2870   /* force binary viewer to load .info file if it has not yet done so */
2871   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2872   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2874   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2875   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2876   if (!rank) {
2877     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2878     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2879   }
2880 
2881   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2882   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2883   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2884   if (bs < 0) bs = 1;
2885 
2886   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2887   M    = header[1]; N = header[2];
2888 
2889   /* If global sizes are set, check if they are consistent with that given in the file */
2890   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2891   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2892 
2893   /* determine ownership of all (block) rows */
2894   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2895   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2896   else m = newMat->rmap->n; /* Set by user */
2897 
2898   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2899   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2900 
2901   /* First process needs enough room for process with most rows */
2902   if (!rank) {
2903     mmax = rowners[1];
2904     for (i=2; i<=size; i++) {
2905       mmax = PetscMax(mmax, rowners[i]);
2906     }
2907   } else mmax = -1;             /* unused, but compilers complain */
2908 
2909   rowners[0] = 0;
2910   for (i=2; i<=size; i++) {
2911     rowners[i] += rowners[i-1];
2912   }
2913   rstart = rowners[rank];
2914   rend   = rowners[rank+1];
2915 
2916   /* distribute row lengths to all processors */
2917   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2918   if (!rank) {
2919     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2920     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2921     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2922     for (j=0; j<m; j++) {
2923       procsnz[0] += ourlens[j];
2924     }
2925     for (i=1; i<size; i++) {
2926       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2927       /* calculate the number of nonzeros on each processor */
2928       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2929         procsnz[i] += rowlengths[j];
2930       }
2931       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2932     }
2933     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2934   } else {
2935     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2936   }
2937 
2938   if (!rank) {
2939     /* determine max buffer needed and allocate it */
2940     maxnz = 0;
2941     for (i=0; i<size; i++) {
2942       maxnz = PetscMax(maxnz,procsnz[i]);
2943     }
2944     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2945 
2946     /* read in my part of the matrix column indices  */
2947     nz   = procsnz[0];
2948     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2949     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2950 
2951     /* read in every one elses and ship off */
2952     for (i=1; i<size; i++) {
2953       nz   = procsnz[i];
2954       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2955       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2956     }
2957     ierr = PetscFree(cols);CHKERRQ(ierr);
2958   } else {
2959     /* determine buffer space needed for message */
2960     nz = 0;
2961     for (i=0; i<m; i++) {
2962       nz += ourlens[i];
2963     }
2964     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2965 
2966     /* receive message of column indices*/
2967     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2968   }
2969 
2970   /* determine column ownership if matrix is not square */
2971   if (N != M) {
2972     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2973     else n = newMat->cmap->n;
2974     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2975     cstart = cend - n;
2976   } else {
2977     cstart = rstart;
2978     cend   = rend;
2979     n      = cend - cstart;
2980   }
2981 
2982   /* loop over local rows, determining number of off diagonal entries */
2983   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2984   jj   = 0;
2985   for (i=0; i<m; i++) {
2986     for (j=0; j<ourlens[i]; j++) {
2987       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2988       jj++;
2989     }
2990   }
2991 
2992   for (i=0; i<m; i++) {
2993     ourlens[i] -= offlens[i];
2994   }
2995   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2996 
2997   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2998 
2999   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3000 
3001   for (i=0; i<m; i++) {
3002     ourlens[i] += offlens[i];
3003   }
3004 
3005   if (!rank) {
3006     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3007 
3008     /* read in my part of the matrix numerical values  */
3009     nz   = procsnz[0];
3010     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3011 
3012     /* insert into matrix */
3013     jj      = rstart;
3014     smycols = mycols;
3015     svals   = vals;
3016     for (i=0; i<m; i++) {
3017       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3018       smycols += ourlens[i];
3019       svals   += ourlens[i];
3020       jj++;
3021     }
3022 
3023     /* read in other processors and ship out */
3024     for (i=1; i<size; i++) {
3025       nz   = procsnz[i];
3026       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3027       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3028     }
3029     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3030   } else {
3031     /* receive numeric values */
3032     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3033 
3034     /* receive message of values*/
3035     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3036 
3037     /* insert into matrix */
3038     jj      = rstart;
3039     smycols = mycols;
3040     svals   = vals;
3041     for (i=0; i<m; i++) {
3042       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3043       smycols += ourlens[i];
3044       svals   += ourlens[i];
3045       jj++;
3046     }
3047   }
3048   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3049   ierr = PetscFree(vals);CHKERRQ(ierr);
3050   ierr = PetscFree(mycols);CHKERRQ(ierr);
3051   ierr = PetscFree(rowners);CHKERRQ(ierr);
3052   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 #undef __FUNCT__
3058 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3059 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3060 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3061 {
3062   PetscErrorCode ierr;
3063   IS             iscol_local;
3064   PetscInt       csize;
3065 
3066   PetscFunctionBegin;
3067   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3068   if (call == MAT_REUSE_MATRIX) {
3069     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3070     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3071   } else {
3072     /* check if we are grabbing all columns*/
3073     PetscBool    isstride;
3074     PetscMPIInt  lisstride = 0,gisstride;
3075     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3076     if (isstride) {
3077       PetscInt  start,len,mstart,mlen;
3078       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3079       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3080       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3081       if (mstart == start && mlen-mstart == len) lisstride = 1;
3082     }
3083     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3084     if (gisstride) {
3085       PetscInt N;
3086       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3087       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3088       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3089       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3090     } else {
3091       PetscInt cbs;
3092       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3093       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3094       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3095     }
3096   }
3097   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3098   if (call == MAT_INITIAL_MATRIX) {
3099     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3100     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3101   }
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3106 #undef __FUNCT__
3107 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3108 /*
3109     Not great since it makes two copies of the submatrix, first an SeqAIJ
3110   in local and then by concatenating the local matrices the end result.
3111   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3112 
3113   Note: This requires a sequential iscol with all indices.
3114 */
3115 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3116 {
3117   PetscErrorCode ierr;
3118   PetscMPIInt    rank,size;
3119   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3120   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3121   PetscBool      allcolumns, colflag;
3122   Mat            M,Mreuse;
3123   MatScalar      *vwork,*aa;
3124   MPI_Comm       comm;
3125   Mat_SeqAIJ     *aij;
3126 
3127   PetscFunctionBegin;
3128   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3129   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3130   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3131 
3132   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3133   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3134   if (colflag && ncol == mat->cmap->N) {
3135     allcolumns = PETSC_TRUE;
3136     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3137   } else {
3138     allcolumns = PETSC_FALSE;
3139   }
3140   if (call ==  MAT_REUSE_MATRIX) {
3141     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3142     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3143     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3144   } else {
3145     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3146   }
3147 
3148   /*
3149       m - number of local rows
3150       n - number of columns (same on all processors)
3151       rstart - first row in new global matrix generated
3152   */
3153   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3154   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3155   if (call == MAT_INITIAL_MATRIX) {
3156     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3157     ii  = aij->i;
3158     jj  = aij->j;
3159 
3160     /*
3161         Determine the number of non-zeros in the diagonal and off-diagonal
3162         portions of the matrix in order to do correct preallocation
3163     */
3164 
3165     /* first get start and end of "diagonal" columns */
3166     if (csize == PETSC_DECIDE) {
3167       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3168       if (mglobal == n) { /* square matrix */
3169         nlocal = m;
3170       } else {
3171         nlocal = n/size + ((n % size) > rank);
3172       }
3173     } else {
3174       nlocal = csize;
3175     }
3176     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3177     rstart = rend - nlocal;
3178     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3179 
3180     /* next, compute all the lengths */
3181     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3182     olens = dlens + m;
3183     for (i=0; i<m; i++) {
3184       jend = ii[i+1] - ii[i];
3185       olen = 0;
3186       dlen = 0;
3187       for (j=0; j<jend; j++) {
3188         if (*jj < rstart || *jj >= rend) olen++;
3189         else dlen++;
3190         jj++;
3191       }
3192       olens[i] = olen;
3193       dlens[i] = dlen;
3194     }
3195     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3196     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3197     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3198     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3199     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3200     ierr = PetscFree(dlens);CHKERRQ(ierr);
3201   } else {
3202     PetscInt ml,nl;
3203 
3204     M    = *newmat;
3205     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3206     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3207     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3208     /*
3209          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3210        rather than the slower MatSetValues().
3211     */
3212     M->was_assembled = PETSC_TRUE;
3213     M->assembled     = PETSC_FALSE;
3214   }
3215   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3216   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3217   ii   = aij->i;
3218   jj   = aij->j;
3219   aa   = aij->a;
3220   for (i=0; i<m; i++) {
3221     row   = rstart + i;
3222     nz    = ii[i+1] - ii[i];
3223     cwork = jj;     jj += nz;
3224     vwork = aa;     aa += nz;
3225     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3226   }
3227 
3228   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3229   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3230   *newmat = M;
3231 
3232   /* save submatrix used in processor for next request */
3233   if (call ==  MAT_INITIAL_MATRIX) {
3234     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3235     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3236   }
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3242 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3243 {
3244   PetscInt       m,cstart, cend,j,nnz,i,d;
3245   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3246   const PetscInt *JJ;
3247   PetscScalar    *values;
3248   PetscErrorCode ierr;
3249 
3250   PetscFunctionBegin;
3251   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3252 
3253   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3254   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3255   m      = B->rmap->n;
3256   cstart = B->cmap->rstart;
3257   cend   = B->cmap->rend;
3258   rstart = B->rmap->rstart;
3259 
3260   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3261 
3262 #if defined(PETSC_USE_DEBUGGING)
3263   for (i=0; i<m; i++) {
3264     nnz = Ii[i+1]- Ii[i];
3265     JJ  = J + Ii[i];
3266     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3267     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3268     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3269   }
3270 #endif
3271 
3272   for (i=0; i<m; i++) {
3273     nnz     = Ii[i+1]- Ii[i];
3274     JJ      = J + Ii[i];
3275     nnz_max = PetscMax(nnz_max,nnz);
3276     d       = 0;
3277     for (j=0; j<nnz; j++) {
3278       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3279     }
3280     d_nnz[i] = d;
3281     o_nnz[i] = nnz - d;
3282   }
3283   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3284   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3285 
3286   if (v) values = (PetscScalar*)v;
3287   else {
3288     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3289   }
3290 
3291   for (i=0; i<m; i++) {
3292     ii   = i + rstart;
3293     nnz  = Ii[i+1]- Ii[i];
3294     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3295   }
3296   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3297   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3298 
3299   if (!v) {
3300     ierr = PetscFree(values);CHKERRQ(ierr);
3301   }
3302   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3303   PetscFunctionReturn(0);
3304 }
3305 
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3308 /*@
3309    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3310    (the default parallel PETSc format).
3311 
3312    Collective on MPI_Comm
3313 
3314    Input Parameters:
3315 +  B - the matrix
3316 .  i - the indices into j for the start of each local row (starts with zero)
3317 .  j - the column indices for each local row (starts with zero)
3318 -  v - optional values in the matrix
3319 
3320    Level: developer
3321 
3322    Notes:
3323        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3324      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3325      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3326 
3327        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3328 
3329        The format which is used for the sparse matrix input, is equivalent to a
3330     row-major ordering.. i.e for the following matrix, the input data expected is
3331     as shown
3332 
3333 $        1 0 0
3334 $        2 0 3     P0
3335 $       -------
3336 $        4 5 6     P1
3337 $
3338 $     Process0 [P0]: rows_owned=[0,1]
3339 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3340 $        j =  {0,0,2}  [size = 3]
3341 $        v =  {1,2,3}  [size = 3]
3342 $
3343 $     Process1 [P1]: rows_owned=[2]
3344 $        i =  {0,3}    [size = nrow+1  = 1+1]
3345 $        j =  {0,1,2}  [size = 3]
3346 $        v =  {4,5,6}  [size = 3]
3347 
3348 .keywords: matrix, aij, compressed row, sparse, parallel
3349 
3350 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3351           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3352 @*/
3353 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3354 {
3355   PetscErrorCode ierr;
3356 
3357   PetscFunctionBegin;
3358   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3364 /*@C
3365    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3366    (the default parallel PETSc format).  For good matrix assembly performance
3367    the user should preallocate the matrix storage by setting the parameters
3368    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3369    performance can be increased by more than a factor of 50.
3370 
3371    Collective on MPI_Comm
3372 
3373    Input Parameters:
3374 +  B - the matrix
3375 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3376            (same value is used for all local rows)
3377 .  d_nnz - array containing the number of nonzeros in the various rows of the
3378            DIAGONAL portion of the local submatrix (possibly different for each row)
3379            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3380            The size of this array is equal to the number of local rows, i.e 'm'.
3381            For matrices that will be factored, you must leave room for (and set)
3382            the diagonal entry even if it is zero.
3383 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3384            submatrix (same value is used for all local rows).
3385 -  o_nnz - array containing the number of nonzeros in the various rows of the
3386            OFF-DIAGONAL portion of the local submatrix (possibly different for
3387            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3388            structure. The size of this array is equal to the number
3389            of local rows, i.e 'm'.
3390 
3391    If the *_nnz parameter is given then the *_nz parameter is ignored
3392 
3393    The AIJ format (also called the Yale sparse matrix format or
3394    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3395    storage.  The stored row and column indices begin with zero.
3396    See Users-Manual: ch_mat for details.
3397 
3398    The parallel matrix is partitioned such that the first m0 rows belong to
3399    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3400    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3401 
3402    The DIAGONAL portion of the local submatrix of a processor can be defined
3403    as the submatrix which is obtained by extraction the part corresponding to
3404    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3405    first row that belongs to the processor, r2 is the last row belonging to
3406    the this processor, and c1-c2 is range of indices of the local part of a
3407    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3408    common case of a square matrix, the row and column ranges are the same and
3409    the DIAGONAL part is also square. The remaining portion of the local
3410    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3411 
3412    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3413 
3414    You can call MatGetInfo() to get information on how effective the preallocation was;
3415    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3416    You can also run with the option -info and look for messages with the string
3417    malloc in them to see if additional memory allocation was needed.
3418 
3419    Example usage:
3420 
3421    Consider the following 8x8 matrix with 34 non-zero values, that is
3422    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3423    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3424    as follows:
3425 
3426 .vb
3427             1  2  0  |  0  3  0  |  0  4
3428     Proc0   0  5  6  |  7  0  0  |  8  0
3429             9  0 10  | 11  0  0  | 12  0
3430     -------------------------------------
3431            13  0 14  | 15 16 17  |  0  0
3432     Proc1   0 18  0  | 19 20 21  |  0  0
3433             0  0  0  | 22 23  0  | 24  0
3434     -------------------------------------
3435     Proc2  25 26 27  |  0  0 28  | 29  0
3436            30  0  0  | 31 32 33  |  0 34
3437 .ve
3438 
3439    This can be represented as a collection of submatrices as:
3440 
3441 .vb
3442       A B C
3443       D E F
3444       G H I
3445 .ve
3446 
3447    Where the submatrices A,B,C are owned by proc0, D,E,F are
3448    owned by proc1, G,H,I are owned by proc2.
3449 
3450    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3451    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3452    The 'M','N' parameters are 8,8, and have the same values on all procs.
3453 
3454    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3455    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3456    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3457    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3458    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3459    matrix, ans [DF] as another SeqAIJ matrix.
3460 
3461    When d_nz, o_nz parameters are specified, d_nz storage elements are
3462    allocated for every row of the local diagonal submatrix, and o_nz
3463    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3464    One way to choose d_nz and o_nz is to use the max nonzerors per local
3465    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3466    In this case, the values of d_nz,o_nz are:
3467 .vb
3468      proc0 : dnz = 2, o_nz = 2
3469      proc1 : dnz = 3, o_nz = 2
3470      proc2 : dnz = 1, o_nz = 4
3471 .ve
3472    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3473    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3474    for proc3. i.e we are using 12+15+10=37 storage locations to store
3475    34 values.
3476 
3477    When d_nnz, o_nnz parameters are specified, the storage is specified
3478    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3479    In the above case the values for d_nnz,o_nnz are:
3480 .vb
3481      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3482      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3483      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3484 .ve
3485    Here the space allocated is sum of all the above values i.e 34, and
3486    hence pre-allocation is perfect.
3487 
3488    Level: intermediate
3489 
3490 .keywords: matrix, aij, compressed row, sparse, parallel
3491 
3492 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3493           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3494 @*/
3495 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3496 {
3497   PetscErrorCode ierr;
3498 
3499   PetscFunctionBegin;
3500   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3501   PetscValidType(B,1);
3502   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3508 /*@
3509      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3510          CSR format the local rows.
3511 
3512    Collective on MPI_Comm
3513 
3514    Input Parameters:
3515 +  comm - MPI communicator
3516 .  m - number of local rows (Cannot be PETSC_DECIDE)
3517 .  n - This value should be the same as the local size used in creating the
3518        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3519        calculated if N is given) For square matrices n is almost always m.
3520 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3521 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3522 .   i - row indices
3523 .   j - column indices
3524 -   a - matrix values
3525 
3526    Output Parameter:
3527 .   mat - the matrix
3528 
3529    Level: intermediate
3530 
3531    Notes:
3532        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3533      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3534      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3535 
3536        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3537 
3538        The format which is used for the sparse matrix input, is equivalent to a
3539     row-major ordering.. i.e for the following matrix, the input data expected is
3540     as shown
3541 
3542 $        1 0 0
3543 $        2 0 3     P0
3544 $       -------
3545 $        4 5 6     P1
3546 $
3547 $     Process0 [P0]: rows_owned=[0,1]
3548 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3549 $        j =  {0,0,2}  [size = 3]
3550 $        v =  {1,2,3}  [size = 3]
3551 $
3552 $     Process1 [P1]: rows_owned=[2]
3553 $        i =  {0,3}    [size = nrow+1  = 1+1]
3554 $        j =  {0,1,2}  [size = 3]
3555 $        v =  {4,5,6}  [size = 3]
3556 
3557 .keywords: matrix, aij, compressed row, sparse, parallel
3558 
3559 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3560           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3561 @*/
3562 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3563 {
3564   PetscErrorCode ierr;
3565 
3566   PetscFunctionBegin;
3567   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3568   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3569   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3570   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3571   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3572   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3573   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3574   PetscFunctionReturn(0);
3575 }
3576 
3577 #undef __FUNCT__
3578 #define __FUNCT__ "MatCreateAIJ"
3579 /*@C
3580    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3581    (the default parallel PETSc format).  For good matrix assembly performance
3582    the user should preallocate the matrix storage by setting the parameters
3583    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3584    performance can be increased by more than a factor of 50.
3585 
3586    Collective on MPI_Comm
3587 
3588    Input Parameters:
3589 +  comm - MPI communicator
3590 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3591            This value should be the same as the local size used in creating the
3592            y vector for the matrix-vector product y = Ax.
3593 .  n - This value should be the same as the local size used in creating the
3594        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3595        calculated if N is given) For square matrices n is almost always m.
3596 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3597 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3598 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3599            (same value is used for all local rows)
3600 .  d_nnz - array containing the number of nonzeros in the various rows of the
3601            DIAGONAL portion of the local submatrix (possibly different for each row)
3602            or NULL, if d_nz is used to specify the nonzero structure.
3603            The size of this array is equal to the number of local rows, i.e 'm'.
3604 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3605            submatrix (same value is used for all local rows).
3606 -  o_nnz - array containing the number of nonzeros in the various rows of the
3607            OFF-DIAGONAL portion of the local submatrix (possibly different for
3608            each row) or NULL, if o_nz is used to specify the nonzero
3609            structure. The size of this array is equal to the number
3610            of local rows, i.e 'm'.
3611 
3612    Output Parameter:
3613 .  A - the matrix
3614 
3615    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3616    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3617    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3618 
3619    Notes:
3620    If the *_nnz parameter is given then the *_nz parameter is ignored
3621 
3622    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3623    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3624    storage requirements for this matrix.
3625 
3626    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3627    processor than it must be used on all processors that share the object for
3628    that argument.
3629 
3630    The user MUST specify either the local or global matrix dimensions
3631    (possibly both).
3632 
3633    The parallel matrix is partitioned across processors such that the
3634    first m0 rows belong to process 0, the next m1 rows belong to
3635    process 1, the next m2 rows belong to process 2 etc.. where
3636    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3637    values corresponding to [m x N] submatrix.
3638 
3639    The columns are logically partitioned with the n0 columns belonging
3640    to 0th partition, the next n1 columns belonging to the next
3641    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3642 
3643    The DIAGONAL portion of the local submatrix on any given processor
3644    is the submatrix corresponding to the rows and columns m,n
3645    corresponding to the given processor. i.e diagonal matrix on
3646    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3647    etc. The remaining portion of the local submatrix [m x (N-n)]
3648    constitute the OFF-DIAGONAL portion. The example below better
3649    illustrates this concept.
3650 
3651    For a square global matrix we define each processor's diagonal portion
3652    to be its local rows and the corresponding columns (a square submatrix);
3653    each processor's off-diagonal portion encompasses the remainder of the
3654    local matrix (a rectangular submatrix).
3655 
3656    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3657 
3658    When calling this routine with a single process communicator, a matrix of
3659    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3660    type of communicator, use the construction mechanism:
3661      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3662 
3663    By default, this format uses inodes (identical nodes) when possible.
3664    We search for consecutive rows with the same nonzero structure, thereby
3665    reusing matrix information to achieve increased efficiency.
3666 
3667    Options Database Keys:
3668 +  -mat_no_inode  - Do not use inodes
3669 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3670 -  -mat_aij_oneindex - Internally use indexing starting at 1
3671         rather than 0.  Note that when calling MatSetValues(),
3672         the user still MUST index entries starting at 0!
3673 
3674 
3675    Example usage:
3676 
3677    Consider the following 8x8 matrix with 34 non-zero values, that is
3678    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3679    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3680    as follows:
3681 
3682 .vb
3683             1  2  0  |  0  3  0  |  0  4
3684     Proc0   0  5  6  |  7  0  0  |  8  0
3685             9  0 10  | 11  0  0  | 12  0
3686     -------------------------------------
3687            13  0 14  | 15 16 17  |  0  0
3688     Proc1   0 18  0  | 19 20 21  |  0  0
3689             0  0  0  | 22 23  0  | 24  0
3690     -------------------------------------
3691     Proc2  25 26 27  |  0  0 28  | 29  0
3692            30  0  0  | 31 32 33  |  0 34
3693 .ve
3694 
3695    This can be represented as a collection of submatrices as:
3696 
3697 .vb
3698       A B C
3699       D E F
3700       G H I
3701 .ve
3702 
3703    Where the submatrices A,B,C are owned by proc0, D,E,F are
3704    owned by proc1, G,H,I are owned by proc2.
3705 
3706    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3707    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3708    The 'M','N' parameters are 8,8, and have the same values on all procs.
3709 
3710    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3711    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3712    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3713    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3714    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3715    matrix, ans [DF] as another SeqAIJ matrix.
3716 
3717    When d_nz, o_nz parameters are specified, d_nz storage elements are
3718    allocated for every row of the local diagonal submatrix, and o_nz
3719    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3720    One way to choose d_nz and o_nz is to use the max nonzerors per local
3721    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3722    In this case, the values of d_nz,o_nz are:
3723 .vb
3724      proc0 : dnz = 2, o_nz = 2
3725      proc1 : dnz = 3, o_nz = 2
3726      proc2 : dnz = 1, o_nz = 4
3727 .ve
3728    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3729    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3730    for proc3. i.e we are using 12+15+10=37 storage locations to store
3731    34 values.
3732 
3733    When d_nnz, o_nnz parameters are specified, the storage is specified
3734    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3735    In the above case the values for d_nnz,o_nnz are:
3736 .vb
3737      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3738      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3739      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3740 .ve
3741    Here the space allocated is sum of all the above values i.e 34, and
3742    hence pre-allocation is perfect.
3743 
3744    Level: intermediate
3745 
3746 .keywords: matrix, aij, compressed row, sparse, parallel
3747 
3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3749           MPIAIJ, MatCreateMPIAIJWithArrays()
3750 @*/
3751 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3752 {
3753   PetscErrorCode ierr;
3754   PetscMPIInt    size;
3755 
3756   PetscFunctionBegin;
3757   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3758   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3759   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3760   if (size > 1) {
3761     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3762     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3763   } else {
3764     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3765     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3766   }
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3772 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3773 {
3774   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3775   PetscBool      flg;
3776   PetscErrorCode ierr;
3777 
3778   PetscFunctionBegin;
3779   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3780   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input");
3781   if (Ad)     *Ad     = a->A;
3782   if (Ao)     *Ao     = a->B;
3783   if (colmap) *colmap = a->garray;
3784   PetscFunctionReturn(0);
3785 }
3786 
3787 #undef __FUNCT__
3788 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3789 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3790 {
3791   PetscErrorCode ierr;
3792   PetscInt       i;
3793   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3794 
3795   PetscFunctionBegin;
3796   if (coloring->ctype == IS_COLORING_GLOBAL) {
3797     ISColoringValue *allcolors,*colors;
3798     ISColoring      ocoloring;
3799 
3800     /* set coloring for diagonal portion */
3801     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3802 
3803     /* set coloring for off-diagonal portion */
3804     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3805     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3806     for (i=0; i<a->B->cmap->n; i++) {
3807       colors[i] = allcolors[a->garray[i]];
3808     }
3809     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3810     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3811     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3812     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3813   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3814     ISColoringValue *colors;
3815     PetscInt        *larray;
3816     ISColoring      ocoloring;
3817 
3818     /* set coloring for diagonal portion */
3819     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3820     for (i=0; i<a->A->cmap->n; i++) {
3821       larray[i] = i + A->cmap->rstart;
3822     }
3823     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3824     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3825     for (i=0; i<a->A->cmap->n; i++) {
3826       colors[i] = coloring->colors[larray[i]];
3827     }
3828     ierr = PetscFree(larray);CHKERRQ(ierr);
3829     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3830     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3831     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3832 
3833     /* set coloring for off-diagonal portion */
3834     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3835     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3836     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3837     for (i=0; i<a->B->cmap->n; i++) {
3838       colors[i] = coloring->colors[larray[i]];
3839     }
3840     ierr = PetscFree(larray);CHKERRQ(ierr);
3841     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3842     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3843     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3844   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3845   PetscFunctionReturn(0);
3846 }
3847 
3848 #undef __FUNCT__
3849 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3850 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3851 {
3852   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3853   PetscErrorCode ierr;
3854 
3855   PetscFunctionBegin;
3856   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3857   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3858   PetscFunctionReturn(0);
3859 }
3860 
3861 #undef __FUNCT__
3862 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3863 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3864 {
3865   PetscErrorCode ierr;
3866   PetscInt       m,N,i,rstart,nnz,Ii;
3867   PetscInt       *indx;
3868   PetscScalar    *values;
3869 
3870   PetscFunctionBegin;
3871   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3872   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3873     PetscInt       *dnz,*onz,sum,bs,cbs;
3874 
3875     if (n == PETSC_DECIDE) {
3876       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3877     }
3878     /* Check sum(n) = N */
3879     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3880     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3881 
3882     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3883     rstart -= m;
3884 
3885     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3886     for (i=0; i<m; i++) {
3887       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3888       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3889       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3890     }
3891 
3892     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3893     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3894     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3895     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3896     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3897     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3898     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3899   }
3900 
3901   /* numeric phase */
3902   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3903   for (i=0; i<m; i++) {
3904     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3905     Ii   = i + rstart;
3906     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3907     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3908   }
3909   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3910   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatFileSplit"
3916 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3917 {
3918   PetscErrorCode    ierr;
3919   PetscMPIInt       rank;
3920   PetscInt          m,N,i,rstart,nnz;
3921   size_t            len;
3922   const PetscInt    *indx;
3923   PetscViewer       out;
3924   char              *name;
3925   Mat               B;
3926   const PetscScalar *values;
3927 
3928   PetscFunctionBegin;
3929   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3930   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3931   /* Should this be the type of the diagonal block of A? */
3932   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3933   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3934   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3935   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3936   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3937   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3938   for (i=0; i<m; i++) {
3939     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3940     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3941     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3942   }
3943   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3944   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3945 
3946   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3947   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3948   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3949   sprintf(name,"%s.%d",outfile,rank);
3950   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3951   ierr = PetscFree(name);CHKERRQ(ierr);
3952   ierr = MatView(B,out);CHKERRQ(ierr);
3953   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3954   ierr = MatDestroy(&B);CHKERRQ(ierr);
3955   PetscFunctionReturn(0);
3956 }
3957 
3958 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3959 #undef __FUNCT__
3960 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3961 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3962 {
3963   PetscErrorCode      ierr;
3964   Mat_Merge_SeqsToMPI *merge;
3965   PetscContainer      container;
3966 
3967   PetscFunctionBegin;
3968   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3969   if (container) {
3970     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3971     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3972     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3973     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3974     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3975     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3976     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3977     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3978     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3979     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3980     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3981     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3982     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3983     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3984     ierr = PetscFree(merge);CHKERRQ(ierr);
3985     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3986   }
3987   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3988   PetscFunctionReturn(0);
3989 }
3990 
3991 #include <../src/mat/utils/freespace.h>
3992 #include <petscbt.h>
3993 
3994 #undef __FUNCT__
3995 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3996 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3997 {
3998   PetscErrorCode      ierr;
3999   MPI_Comm            comm;
4000   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4001   PetscMPIInt         size,rank,taga,*len_s;
4002   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4003   PetscInt            proc,m;
4004   PetscInt            **buf_ri,**buf_rj;
4005   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4006   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4007   MPI_Request         *s_waits,*r_waits;
4008   MPI_Status          *status;
4009   MatScalar           *aa=a->a;
4010   MatScalar           **abuf_r,*ba_i;
4011   Mat_Merge_SeqsToMPI *merge;
4012   PetscContainer      container;
4013 
4014   PetscFunctionBegin;
4015   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4016   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4017 
4018   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4019   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4020 
4021   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4022   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4023 
4024   bi     = merge->bi;
4025   bj     = merge->bj;
4026   buf_ri = merge->buf_ri;
4027   buf_rj = merge->buf_rj;
4028 
4029   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4030   owners = merge->rowmap->range;
4031   len_s  = merge->len_s;
4032 
4033   /* send and recv matrix values */
4034   /*-----------------------------*/
4035   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4036   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4037 
4038   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4039   for (proc=0,k=0; proc<size; proc++) {
4040     if (!len_s[proc]) continue;
4041     i    = owners[proc];
4042     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4043     k++;
4044   }
4045 
4046   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4047   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4048   ierr = PetscFree(status);CHKERRQ(ierr);
4049 
4050   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4051   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4052 
4053   /* insert mat values of mpimat */
4054   /*----------------------------*/
4055   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4056   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4057 
4058   for (k=0; k<merge->nrecv; k++) {
4059     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4060     nrows       = *(buf_ri_k[k]);
4061     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4062     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4063   }
4064 
4065   /* set values of ba */
4066   m = merge->rowmap->n;
4067   for (i=0; i<m; i++) {
4068     arow = owners[rank] + i;
4069     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4070     bnzi = bi[i+1] - bi[i];
4071     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4072 
4073     /* add local non-zero vals of this proc's seqmat into ba */
4074     anzi   = ai[arow+1] - ai[arow];
4075     aj     = a->j + ai[arow];
4076     aa     = a->a + ai[arow];
4077     nextaj = 0;
4078     for (j=0; nextaj<anzi; j++) {
4079       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4080         ba_i[j] += aa[nextaj++];
4081       }
4082     }
4083 
4084     /* add received vals into ba */
4085     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4086       /* i-th row */
4087       if (i == *nextrow[k]) {
4088         anzi   = *(nextai[k]+1) - *nextai[k];
4089         aj     = buf_rj[k] + *(nextai[k]);
4090         aa     = abuf_r[k] + *(nextai[k]);
4091         nextaj = 0;
4092         for (j=0; nextaj<anzi; j++) {
4093           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4094             ba_i[j] += aa[nextaj++];
4095           }
4096         }
4097         nextrow[k]++; nextai[k]++;
4098       }
4099     }
4100     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4101   }
4102   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4103   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4104 
4105   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4106   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4107   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4108   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4109   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4110   PetscFunctionReturn(0);
4111 }
4112 
4113 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4114 
4115 #undef __FUNCT__
4116 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4117 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4118 {
4119   PetscErrorCode      ierr;
4120   Mat                 B_mpi;
4121   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4122   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4123   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4124   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4125   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4126   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4127   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4128   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4129   MPI_Status          *status;
4130   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4131   PetscBT             lnkbt;
4132   Mat_Merge_SeqsToMPI *merge;
4133   PetscContainer      container;
4134 
4135   PetscFunctionBegin;
4136   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4137 
4138   /* make sure it is a PETSc comm */
4139   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4141   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4142 
4143   ierr = PetscNew(&merge);CHKERRQ(ierr);
4144   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4145 
4146   /* determine row ownership */
4147   /*---------------------------------------------------------*/
4148   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4149   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4150   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4151   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4152   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4153   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4154   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4155 
4156   m      = merge->rowmap->n;
4157   owners = merge->rowmap->range;
4158 
4159   /* determine the number of messages to send, their lengths */
4160   /*---------------------------------------------------------*/
4161   len_s = merge->len_s;
4162 
4163   len          = 0; /* length of buf_si[] */
4164   merge->nsend = 0;
4165   for (proc=0; proc<size; proc++) {
4166     len_si[proc] = 0;
4167     if (proc == rank) {
4168       len_s[proc] = 0;
4169     } else {
4170       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4171       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4172     }
4173     if (len_s[proc]) {
4174       merge->nsend++;
4175       nrows = 0;
4176       for (i=owners[proc]; i<owners[proc+1]; i++) {
4177         if (ai[i+1] > ai[i]) nrows++;
4178       }
4179       len_si[proc] = 2*(nrows+1);
4180       len         += len_si[proc];
4181     }
4182   }
4183 
4184   /* determine the number and length of messages to receive for ij-structure */
4185   /*-------------------------------------------------------------------------*/
4186   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4187   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4188 
4189   /* post the Irecv of j-structure */
4190   /*-------------------------------*/
4191   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4192   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4193 
4194   /* post the Isend of j-structure */
4195   /*--------------------------------*/
4196   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4197 
4198   for (proc=0, k=0; proc<size; proc++) {
4199     if (!len_s[proc]) continue;
4200     i    = owners[proc];
4201     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4202     k++;
4203   }
4204 
4205   /* receives and sends of j-structure are complete */
4206   /*------------------------------------------------*/
4207   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4208   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4209 
4210   /* send and recv i-structure */
4211   /*---------------------------*/
4212   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4213   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4214 
4215   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4216   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4217   for (proc=0,k=0; proc<size; proc++) {
4218     if (!len_s[proc]) continue;
4219     /* form outgoing message for i-structure:
4220          buf_si[0]:                 nrows to be sent
4221                [1:nrows]:           row index (global)
4222                [nrows+1:2*nrows+1]: i-structure index
4223     */
4224     /*-------------------------------------------*/
4225     nrows       = len_si[proc]/2 - 1;
4226     buf_si_i    = buf_si + nrows+1;
4227     buf_si[0]   = nrows;
4228     buf_si_i[0] = 0;
4229     nrows       = 0;
4230     for (i=owners[proc]; i<owners[proc+1]; i++) {
4231       anzi = ai[i+1] - ai[i];
4232       if (anzi) {
4233         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4234         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4235         nrows++;
4236       }
4237     }
4238     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4239     k++;
4240     buf_si += len_si[proc];
4241   }
4242 
4243   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4244   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4245 
4246   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4247   for (i=0; i<merge->nrecv; i++) {
4248     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4249   }
4250 
4251   ierr = PetscFree(len_si);CHKERRQ(ierr);
4252   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4253   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4254   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4255   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4256   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4257   ierr = PetscFree(status);CHKERRQ(ierr);
4258 
4259   /* compute a local seq matrix in each processor */
4260   /*----------------------------------------------*/
4261   /* allocate bi array and free space for accumulating nonzero column info */
4262   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4263   bi[0] = 0;
4264 
4265   /* create and initialize a linked list */
4266   nlnk = N+1;
4267   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4268 
4269   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4270   len  = ai[owners[rank+1]] - ai[owners[rank]];
4271   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4272 
4273   current_space = free_space;
4274 
4275   /* determine symbolic info for each local row */
4276   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4277 
4278   for (k=0; k<merge->nrecv; k++) {
4279     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4280     nrows       = *buf_ri_k[k];
4281     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4282     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4283   }
4284 
4285   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4286   len  = 0;
4287   for (i=0; i<m; i++) {
4288     bnzi = 0;
4289     /* add local non-zero cols of this proc's seqmat into lnk */
4290     arow  = owners[rank] + i;
4291     anzi  = ai[arow+1] - ai[arow];
4292     aj    = a->j + ai[arow];
4293     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4294     bnzi += nlnk;
4295     /* add received col data into lnk */
4296     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4297       if (i == *nextrow[k]) { /* i-th row */
4298         anzi  = *(nextai[k]+1) - *nextai[k];
4299         aj    = buf_rj[k] + *nextai[k];
4300         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4301         bnzi += nlnk;
4302         nextrow[k]++; nextai[k]++;
4303       }
4304     }
4305     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4306 
4307     /* if free space is not available, make more free space */
4308     if (current_space->local_remaining<bnzi) {
4309       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4310       nspacedouble++;
4311     }
4312     /* copy data into free space, then initialize lnk */
4313     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4314     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4315 
4316     current_space->array           += bnzi;
4317     current_space->local_used      += bnzi;
4318     current_space->local_remaining -= bnzi;
4319 
4320     bi[i+1] = bi[i] + bnzi;
4321   }
4322 
4323   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4324 
4325   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4326   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4327   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4328 
4329   /* create symbolic parallel matrix B_mpi */
4330   /*---------------------------------------*/
4331   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4332   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4333   if (n==PETSC_DECIDE) {
4334     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4335   } else {
4336     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4337   }
4338   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4339   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4340   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4341   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4342   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4343 
4344   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4345   B_mpi->assembled    = PETSC_FALSE;
4346   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4347   merge->bi           = bi;
4348   merge->bj           = bj;
4349   merge->buf_ri       = buf_ri;
4350   merge->buf_rj       = buf_rj;
4351   merge->coi          = NULL;
4352   merge->coj          = NULL;
4353   merge->owners_co    = NULL;
4354 
4355   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4356 
4357   /* attach the supporting struct to B_mpi for reuse */
4358   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4359   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4360   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4361   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4362   *mpimat = B_mpi;
4363 
4364   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 #undef __FUNCT__
4369 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4370 /*@C
4371       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4372                  matrices from each processor
4373 
4374     Collective on MPI_Comm
4375 
4376    Input Parameters:
4377 +    comm - the communicators the parallel matrix will live on
4378 .    seqmat - the input sequential matrices
4379 .    m - number of local rows (or PETSC_DECIDE)
4380 .    n - number of local columns (or PETSC_DECIDE)
4381 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4382 
4383    Output Parameter:
4384 .    mpimat - the parallel matrix generated
4385 
4386     Level: advanced
4387 
4388    Notes:
4389      The dimensions of the sequential matrix in each processor MUST be the same.
4390      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4391      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4392 @*/
4393 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4394 {
4395   PetscErrorCode ierr;
4396   PetscMPIInt    size;
4397 
4398   PetscFunctionBegin;
4399   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4400   if (size == 1) {
4401     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4402     if (scall == MAT_INITIAL_MATRIX) {
4403       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4404     } else {
4405       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4406     }
4407     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4408     PetscFunctionReturn(0);
4409   }
4410   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4411   if (scall == MAT_INITIAL_MATRIX) {
4412     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4413   }
4414   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4415   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4416   PetscFunctionReturn(0);
4417 }
4418 
4419 #undef __FUNCT__
4420 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4421 /*@
4422      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4423           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4424           with MatGetSize()
4425 
4426     Not Collective
4427 
4428    Input Parameters:
4429 +    A - the matrix
4430 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4431 
4432    Output Parameter:
4433 .    A_loc - the local sequential matrix generated
4434 
4435     Level: developer
4436 
4437 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4438 
4439 @*/
4440 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4441 {
4442   PetscErrorCode ierr;
4443   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4444   Mat_SeqAIJ     *mat,*a,*b;
4445   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4446   MatScalar      *aa,*ba,*cam;
4447   PetscScalar    *ca;
4448   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4449   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4450   PetscBool      match;
4451   MPI_Comm       comm;
4452   PetscMPIInt    size;
4453 
4454   PetscFunctionBegin;
4455   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4456   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4457   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4459   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4460 
4461   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4462   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4463   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4464   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4465   aa = a->a; ba = b->a;
4466   if (scall == MAT_INITIAL_MATRIX) {
4467     if (size == 1) {
4468       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4469       PetscFunctionReturn(0);
4470     }
4471 
4472     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4473     ci[0] = 0;
4474     for (i=0; i<am; i++) {
4475       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4476     }
4477     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4478     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4479     k    = 0;
4480     for (i=0; i<am; i++) {
4481       ncols_o = bi[i+1] - bi[i];
4482       ncols_d = ai[i+1] - ai[i];
4483       /* off-diagonal portion of A */
4484       for (jo=0; jo<ncols_o; jo++) {
4485         col = cmap[*bj];
4486         if (col >= cstart) break;
4487         cj[k]   = col; bj++;
4488         ca[k++] = *ba++;
4489       }
4490       /* diagonal portion of A */
4491       for (j=0; j<ncols_d; j++) {
4492         cj[k]   = cstart + *aj++;
4493         ca[k++] = *aa++;
4494       }
4495       /* off-diagonal portion of A */
4496       for (j=jo; j<ncols_o; j++) {
4497         cj[k]   = cmap[*bj++];
4498         ca[k++] = *ba++;
4499       }
4500     }
4501     /* put together the new matrix */
4502     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4503     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4504     /* Since these are PETSc arrays, change flags to free them as necessary. */
4505     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4506     mat->free_a  = PETSC_TRUE;
4507     mat->free_ij = PETSC_TRUE;
4508     mat->nonew   = 0;
4509   } else if (scall == MAT_REUSE_MATRIX) {
4510     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4511     ci = mat->i; cj = mat->j; cam = mat->a;
4512     for (i=0; i<am; i++) {
4513       /* off-diagonal portion of A */
4514       ncols_o = bi[i+1] - bi[i];
4515       for (jo=0; jo<ncols_o; jo++) {
4516         col = cmap[*bj];
4517         if (col >= cstart) break;
4518         *cam++ = *ba++; bj++;
4519       }
4520       /* diagonal portion of A */
4521       ncols_d = ai[i+1] - ai[i];
4522       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4523       /* off-diagonal portion of A */
4524       for (j=jo; j<ncols_o; j++) {
4525         *cam++ = *ba++; bj++;
4526       }
4527     }
4528   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4529   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4530   PetscFunctionReturn(0);
4531 }
4532 
4533 #undef __FUNCT__
4534 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4535 /*@C
4536      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4537 
4538     Not Collective
4539 
4540    Input Parameters:
4541 +    A - the matrix
4542 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4543 -    row, col - index sets of rows and columns to extract (or NULL)
4544 
4545    Output Parameter:
4546 .    A_loc - the local sequential matrix generated
4547 
4548     Level: developer
4549 
4550 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4551 
4552 @*/
4553 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4554 {
4555   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4556   PetscErrorCode ierr;
4557   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4558   IS             isrowa,iscola;
4559   Mat            *aloc;
4560   PetscBool      match;
4561 
4562   PetscFunctionBegin;
4563   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4564   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4565   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4566   if (!row) {
4567     start = A->rmap->rstart; end = A->rmap->rend;
4568     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4569   } else {
4570     isrowa = *row;
4571   }
4572   if (!col) {
4573     start = A->cmap->rstart;
4574     cmap  = a->garray;
4575     nzA   = a->A->cmap->n;
4576     nzB   = a->B->cmap->n;
4577     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4578     ncols = 0;
4579     for (i=0; i<nzB; i++) {
4580       if (cmap[i] < start) idx[ncols++] = cmap[i];
4581       else break;
4582     }
4583     imark = i;
4584     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4585     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4586     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4587   } else {
4588     iscola = *col;
4589   }
4590   if (scall != MAT_INITIAL_MATRIX) {
4591     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4592     aloc[0] = *A_loc;
4593   }
4594   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4595   *A_loc = aloc[0];
4596   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4597   if (!row) {
4598     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4599   }
4600   if (!col) {
4601     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4602   }
4603   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4604   PetscFunctionReturn(0);
4605 }
4606 
4607 #undef __FUNCT__
4608 #define __FUNCT__ "MatGetBrowsOfAcols"
4609 /*@C
4610     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4611 
4612     Collective on Mat
4613 
4614    Input Parameters:
4615 +    A,B - the matrices in mpiaij format
4616 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4617 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4618 
4619    Output Parameter:
4620 +    rowb, colb - index sets of rows and columns of B to extract
4621 -    B_seq - the sequential matrix generated
4622 
4623     Level: developer
4624 
4625 @*/
4626 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4627 {
4628   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4629   PetscErrorCode ierr;
4630   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4631   IS             isrowb,iscolb;
4632   Mat            *bseq=NULL;
4633 
4634   PetscFunctionBegin;
4635   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4636     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4637   }
4638   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4639 
4640   if (scall == MAT_INITIAL_MATRIX) {
4641     start = A->cmap->rstart;
4642     cmap  = a->garray;
4643     nzA   = a->A->cmap->n;
4644     nzB   = a->B->cmap->n;
4645     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4646     ncols = 0;
4647     for (i=0; i<nzB; i++) {  /* row < local row index */
4648       if (cmap[i] < start) idx[ncols++] = cmap[i];
4649       else break;
4650     }
4651     imark = i;
4652     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4653     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4654     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4655     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4656   } else {
4657     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4658     isrowb  = *rowb; iscolb = *colb;
4659     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4660     bseq[0] = *B_seq;
4661   }
4662   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4663   *B_seq = bseq[0];
4664   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4665   if (!rowb) {
4666     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4667   } else {
4668     *rowb = isrowb;
4669   }
4670   if (!colb) {
4671     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4672   } else {
4673     *colb = iscolb;
4674   }
4675   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4676   PetscFunctionReturn(0);
4677 }
4678 
4679 #undef __FUNCT__
4680 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4681 /*
4682     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4683     of the OFF-DIAGONAL portion of local A
4684 
4685     Collective on Mat
4686 
4687    Input Parameters:
4688 +    A,B - the matrices in mpiaij format
4689 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4690 
4691    Output Parameter:
4692 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4693 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4694 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4695 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4696 
4697     Level: developer
4698 
4699 */
4700 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4701 {
4702   VecScatter_MPI_General *gen_to,*gen_from;
4703   PetscErrorCode         ierr;
4704   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4705   Mat_SeqAIJ             *b_oth;
4706   VecScatter             ctx =a->Mvctx;
4707   MPI_Comm               comm;
4708   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4709   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4710   PetscScalar            *rvalues,*svalues;
4711   MatScalar              *b_otha,*bufa,*bufA;
4712   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4713   MPI_Request            *rwaits = NULL,*swaits = NULL;
4714   MPI_Status             *sstatus,rstatus;
4715   PetscMPIInt            jj,size;
4716   PetscInt               *cols,sbs,rbs;
4717   PetscScalar            *vals;
4718 
4719   PetscFunctionBegin;
4720   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4721   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4722 
4723   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4724     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4725   }
4726   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4727   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4728 
4729   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4730   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4731   rvalues  = gen_from->values; /* holds the length of receiving row */
4732   svalues  = gen_to->values;   /* holds the length of sending row */
4733   nrecvs   = gen_from->n;
4734   nsends   = gen_to->n;
4735 
4736   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4737   srow    = gen_to->indices;    /* local row index to be sent */
4738   sstarts = gen_to->starts;
4739   sprocs  = gen_to->procs;
4740   sstatus = gen_to->sstatus;
4741   sbs     = gen_to->bs;
4742   rstarts = gen_from->starts;
4743   rprocs  = gen_from->procs;
4744   rbs     = gen_from->bs;
4745 
4746   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4747   if (scall == MAT_INITIAL_MATRIX) {
4748     /* i-array */
4749     /*---------*/
4750     /*  post receives */
4751     for (i=0; i<nrecvs; i++) {
4752       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4753       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4754       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4755     }
4756 
4757     /* pack the outgoing message */
4758     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4759 
4760     sstartsj[0] = 0;
4761     rstartsj[0] = 0;
4762     len         = 0; /* total length of j or a array to be sent */
4763     k           = 0;
4764     for (i=0; i<nsends; i++) {
4765       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4766       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4767       for (j=0; j<nrows; j++) {
4768         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4769         for (l=0; l<sbs; l++) {
4770           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4771 
4772           rowlen[j*sbs+l] = ncols;
4773 
4774           len += ncols;
4775           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4776         }
4777         k++;
4778       }
4779       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4780 
4781       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4782     }
4783     /* recvs and sends of i-array are completed */
4784     i = nrecvs;
4785     while (i--) {
4786       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4787     }
4788     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4789 
4790     /* allocate buffers for sending j and a arrays */
4791     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4792     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4793 
4794     /* create i-array of B_oth */
4795     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4796 
4797     b_othi[0] = 0;
4798     len       = 0; /* total length of j or a array to be received */
4799     k         = 0;
4800     for (i=0; i<nrecvs; i++) {
4801       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4802       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4803       for (j=0; j<nrows; j++) {
4804         b_othi[k+1] = b_othi[k] + rowlen[j];
4805         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4806         k++;
4807       }
4808       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4809     }
4810 
4811     /* allocate space for j and a arrrays of B_oth */
4812     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4813     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4814 
4815     /* j-array */
4816     /*---------*/
4817     /*  post receives of j-array */
4818     for (i=0; i<nrecvs; i++) {
4819       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4820       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4821     }
4822 
4823     /* pack the outgoing message j-array */
4824     k = 0;
4825     for (i=0; i<nsends; i++) {
4826       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4827       bufJ  = bufj+sstartsj[i];
4828       for (j=0; j<nrows; j++) {
4829         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4830         for (ll=0; ll<sbs; ll++) {
4831           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4832           for (l=0; l<ncols; l++) {
4833             *bufJ++ = cols[l];
4834           }
4835           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4836         }
4837       }
4838       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4839     }
4840 
4841     /* recvs and sends of j-array are completed */
4842     i = nrecvs;
4843     while (i--) {
4844       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4845     }
4846     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4847   } else if (scall == MAT_REUSE_MATRIX) {
4848     sstartsj = *startsj_s;
4849     rstartsj = *startsj_r;
4850     bufa     = *bufa_ptr;
4851     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4852     b_otha   = b_oth->a;
4853   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4854 
4855   /* a-array */
4856   /*---------*/
4857   /*  post receives of a-array */
4858   for (i=0; i<nrecvs; i++) {
4859     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4860     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4861   }
4862 
4863   /* pack the outgoing message a-array */
4864   k = 0;
4865   for (i=0; i<nsends; i++) {
4866     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4867     bufA  = bufa+sstartsj[i];
4868     for (j=0; j<nrows; j++) {
4869       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4870       for (ll=0; ll<sbs; ll++) {
4871         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4872         for (l=0; l<ncols; l++) {
4873           *bufA++ = vals[l];
4874         }
4875         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4876       }
4877     }
4878     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4879   }
4880   /* recvs and sends of a-array are completed */
4881   i = nrecvs;
4882   while (i--) {
4883     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4884   }
4885   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4886   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4887 
4888   if (scall == MAT_INITIAL_MATRIX) {
4889     /* put together the new matrix */
4890     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4891 
4892     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4893     /* Since these are PETSc arrays, change flags to free them as necessary. */
4894     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4895     b_oth->free_a  = PETSC_TRUE;
4896     b_oth->free_ij = PETSC_TRUE;
4897     b_oth->nonew   = 0;
4898 
4899     ierr = PetscFree(bufj);CHKERRQ(ierr);
4900     if (!startsj_s || !bufa_ptr) {
4901       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4902       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4903     } else {
4904       *startsj_s = sstartsj;
4905       *startsj_r = rstartsj;
4906       *bufa_ptr  = bufa;
4907     }
4908   }
4909   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4910   PetscFunctionReturn(0);
4911 }
4912 
4913 #undef __FUNCT__
4914 #define __FUNCT__ "MatGetCommunicationStructs"
4915 /*@C
4916   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4917 
4918   Not Collective
4919 
4920   Input Parameters:
4921 . A - The matrix in mpiaij format
4922 
4923   Output Parameter:
4924 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4925 . colmap - A map from global column index to local index into lvec
4926 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4927 
4928   Level: developer
4929 
4930 @*/
4931 #if defined(PETSC_USE_CTABLE)
4932 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4933 #else
4934 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4935 #endif
4936 {
4937   Mat_MPIAIJ *a;
4938 
4939   PetscFunctionBegin;
4940   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4941   PetscValidPointer(lvec, 2);
4942   PetscValidPointer(colmap, 3);
4943   PetscValidPointer(multScatter, 4);
4944   a = (Mat_MPIAIJ*) A->data;
4945   if (lvec) *lvec = a->lvec;
4946   if (colmap) *colmap = a->colmap;
4947   if (multScatter) *multScatter = a->Mvctx;
4948   PetscFunctionReturn(0);
4949 }
4950 
4951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4954 #if defined(PETSC_HAVE_ELEMENTAL)
4955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4956 #endif
4957 
4958 #undef __FUNCT__
4959 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4960 /*
4961     Computes (B'*A')' since computing B*A directly is untenable
4962 
4963                n                       p                          p
4964         (              )       (              )         (                  )
4965       m (      A       )  *  n (       B      )   =   m (         C        )
4966         (              )       (              )         (                  )
4967 
4968 */
4969 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4970 {
4971   PetscErrorCode ierr;
4972   Mat            At,Bt,Ct;
4973 
4974   PetscFunctionBegin;
4975   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4976   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4977   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4978   ierr = MatDestroy(&At);CHKERRQ(ierr);
4979   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4980   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4981   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4982   PetscFunctionReturn(0);
4983 }
4984 
4985 #undef __FUNCT__
4986 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4987 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4988 {
4989   PetscErrorCode ierr;
4990   PetscInt       m=A->rmap->n,n=B->cmap->n;
4991   Mat            Cmat;
4992 
4993   PetscFunctionBegin;
4994   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4995   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4996   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4997   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4998   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4999   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5000   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5001   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5002 
5003   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5004 
5005   *C = Cmat;
5006   PetscFunctionReturn(0);
5007 }
5008 
5009 /* ----------------------------------------------------------------*/
5010 #undef __FUNCT__
5011 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5012 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5013 {
5014   PetscErrorCode ierr;
5015 
5016   PetscFunctionBegin;
5017   if (scall == MAT_INITIAL_MATRIX) {
5018     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5019     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5020     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5021   }
5022   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5023   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5024   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 /*MC
5029    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5030 
5031    Options Database Keys:
5032 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5033 
5034   Level: beginner
5035 
5036 .seealso: MatCreateAIJ()
5037 M*/
5038 
5039 #undef __FUNCT__
5040 #define __FUNCT__ "MatCreate_MPIAIJ"
5041 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5042 {
5043   Mat_MPIAIJ     *b;
5044   PetscErrorCode ierr;
5045   PetscMPIInt    size;
5046 
5047   PetscFunctionBegin;
5048   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5049 
5050   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5051   B->data       = (void*)b;
5052   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5053   B->assembled  = PETSC_FALSE;
5054   B->insertmode = NOT_SET_VALUES;
5055   b->size       = size;
5056 
5057   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5058 
5059   /* build cache for off array entries formed */
5060   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5061 
5062   b->donotstash  = PETSC_FALSE;
5063   b->colmap      = 0;
5064   b->garray      = 0;
5065   b->roworiented = PETSC_TRUE;
5066 
5067   /* stuff used for matrix vector multiply */
5068   b->lvec  = NULL;
5069   b->Mvctx = NULL;
5070 
5071   /* stuff for MatGetRow() */
5072   b->rowindices   = 0;
5073   b->rowvalues    = 0;
5074   b->getrowactive = PETSC_FALSE;
5075 
5076   /* flexible pointer used in CUSP/CUSPARSE classes */
5077   b->spptr = NULL;
5078 
5079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5090 #if defined(PETSC_HAVE_ELEMENTAL)
5091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5092 #endif
5093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 #undef __FUNCT__
5101 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5102 /*@C
5103      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5104          and "off-diagonal" part of the matrix in CSR format.
5105 
5106    Collective on MPI_Comm
5107 
5108    Input Parameters:
5109 +  comm - MPI communicator
5110 .  m - number of local rows (Cannot be PETSC_DECIDE)
5111 .  n - This value should be the same as the local size used in creating the
5112        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5113        calculated if N is given) For square matrices n is almost always m.
5114 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5115 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5116 .   i - row indices for "diagonal" portion of matrix
5117 .   j - column indices
5118 .   a - matrix values
5119 .   oi - row indices for "off-diagonal" portion of matrix
5120 .   oj - column indices
5121 -   oa - matrix values
5122 
5123    Output Parameter:
5124 .   mat - the matrix
5125 
5126    Level: advanced
5127 
5128    Notes:
5129        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5130        must free the arrays once the matrix has been destroyed and not before.
5131 
5132        The i and j indices are 0 based
5133 
5134        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5135 
5136        This sets local rows and cannot be used to set off-processor values.
5137 
5138        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5139        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5140        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5141        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5142        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5143        communication if it is known that only local entries will be set.
5144 
5145 .keywords: matrix, aij, compressed row, sparse, parallel
5146 
5147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5148           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5149 @*/
5150 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5151 {
5152   PetscErrorCode ierr;
5153   Mat_MPIAIJ     *maij;
5154 
5155   PetscFunctionBegin;
5156   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5157   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5158   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5159   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5160   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5161   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5162   maij = (Mat_MPIAIJ*) (*mat)->data;
5163 
5164   (*mat)->preallocated = PETSC_TRUE;
5165 
5166   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5167   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5168 
5169   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5170   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5171 
5172   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5173   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5174   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5175   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5176 
5177   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5178   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5179   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5180   PetscFunctionReturn(0);
5181 }
5182 
5183 /*
5184     Special version for direct calls from Fortran
5185 */
5186 #include <petsc/private/fortranimpl.h>
5187 
5188 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5189 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5190 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5191 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5192 #endif
5193 
5194 /* Change these macros so can be used in void function */
5195 #undef CHKERRQ
5196 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5197 #undef SETERRQ2
5198 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5199 #undef SETERRQ3
5200 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5201 #undef SETERRQ
5202 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5203 
5204 #undef __FUNCT__
5205 #define __FUNCT__ "matsetvaluesmpiaij_"
5206 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5207 {
5208   Mat            mat  = *mmat;
5209   PetscInt       m    = *mm, n = *mn;
5210   InsertMode     addv = *maddv;
5211   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5212   PetscScalar    value;
5213   PetscErrorCode ierr;
5214 
5215   MatCheckPreallocated(mat,1);
5216   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5217 
5218 #if defined(PETSC_USE_DEBUG)
5219   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5220 #endif
5221   {
5222     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5223     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5224     PetscBool roworiented = aij->roworiented;
5225 
5226     /* Some Variables required in the macro */
5227     Mat        A                 = aij->A;
5228     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5229     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5230     MatScalar  *aa               = a->a;
5231     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5232     Mat        B                 = aij->B;
5233     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5234     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5235     MatScalar  *ba               = b->a;
5236 
5237     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5238     PetscInt  nonew = a->nonew;
5239     MatScalar *ap1,*ap2;
5240 
5241     PetscFunctionBegin;
5242     for (i=0; i<m; i++) {
5243       if (im[i] < 0) continue;
5244 #if defined(PETSC_USE_DEBUG)
5245       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5246 #endif
5247       if (im[i] >= rstart && im[i] < rend) {
5248         row      = im[i] - rstart;
5249         lastcol1 = -1;
5250         rp1      = aj + ai[row];
5251         ap1      = aa + ai[row];
5252         rmax1    = aimax[row];
5253         nrow1    = ailen[row];
5254         low1     = 0;
5255         high1    = nrow1;
5256         lastcol2 = -1;
5257         rp2      = bj + bi[row];
5258         ap2      = ba + bi[row];
5259         rmax2    = bimax[row];
5260         nrow2    = bilen[row];
5261         low2     = 0;
5262         high2    = nrow2;
5263 
5264         for (j=0; j<n; j++) {
5265           if (roworiented) value = v[i*n+j];
5266           else value = v[i+j*m];
5267           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5268           if (in[j] >= cstart && in[j] < cend) {
5269             col = in[j] - cstart;
5270             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5271           } else if (in[j] < 0) continue;
5272 #if defined(PETSC_USE_DEBUG)
5273           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5274 #endif
5275           else {
5276             if (mat->was_assembled) {
5277               if (!aij->colmap) {
5278                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5279               }
5280 #if defined(PETSC_USE_CTABLE)
5281               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5282               col--;
5283 #else
5284               col = aij->colmap[in[j]] - 1;
5285 #endif
5286               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5287                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5288                 col  =  in[j];
5289                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5290                 B     = aij->B;
5291                 b     = (Mat_SeqAIJ*)B->data;
5292                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5293                 rp2   = bj + bi[row];
5294                 ap2   = ba + bi[row];
5295                 rmax2 = bimax[row];
5296                 nrow2 = bilen[row];
5297                 low2  = 0;
5298                 high2 = nrow2;
5299                 bm    = aij->B->rmap->n;
5300                 ba    = b->a;
5301               }
5302             } else col = in[j];
5303             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5304           }
5305         }
5306       } else if (!aij->donotstash) {
5307         if (roworiented) {
5308           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5309         } else {
5310           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5311         }
5312       }
5313     }
5314   }
5315   PetscFunctionReturnVoid();
5316 }
5317 
5318