xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e37c518b3c178882b922d1d3faeb3ee252cb498a)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
837     PetscBool cong;
838     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
839     if (cong) A->congruentlayouts = 1;
840     else      A->congruentlayouts = 0;
841   }
842   if ((diag != 0.0) && A->congruentlayouts) {
843     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
844   } else if (diag != 0.0) {
845     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
846     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
847     for (r = 0; r < len; ++r) {
848       const PetscInt row = lrows[r] + A->rmap->rstart;
849       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
850     }
851     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
852     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853   } else {
854     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
855   }
856   ierr = PetscFree(lrows);CHKERRQ(ierr);
857 
858   /* only change matrix nonzero state if pattern was allowed to be changed */
859   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
860     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
861     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
862   }
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
868 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
869 {
870   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       n = A->rmap->n;
873   PetscInt          i,j,r,m,p = 0,len = 0;
874   PetscInt          *lrows,*owners = A->rmap->range;
875   PetscSFNode       *rrows;
876   PetscSF           sf;
877   const PetscScalar *xx;
878   PetscScalar       *bb,*mask;
879   Vec               xmask,lmask;
880   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
881   const PetscInt    *aj, *ii,*ridx;
882   PetscScalar       *aa;
883 
884   PetscFunctionBegin;
885   /* Create SF where leaves are input rows and roots are owned rows */
886   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
887   for (r = 0; r < n; ++r) lrows[r] = -1;
888   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
889   for (r = 0; r < N; ++r) {
890     const PetscInt idx   = rows[r];
891     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
892     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
893       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
894     }
895     rrows[r].rank  = p;
896     rrows[r].index = rows[r] - owners[p];
897   }
898   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
899   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
900   /* Collect flags for rows to be zeroed */
901   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
902   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
904   /* Compress and put in row numbers */
905   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
906   /* zero diagonal part of matrix */
907   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
908   /* handle off diagonal part of matrix */
909   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
910   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
911   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
912   for (i=0; i<len; i++) bb[lrows[i]] = 1;
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
917   if (x) {
918     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922   }
923   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
924   /* remove zeroed rows of off diagonal matrix */
925   ii = aij->i;
926   for (i=0; i<len; i++) {
927     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
928   }
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use) {
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++) {
935       n  = ii[i+1] - ii[i];
936       aj = aij->j + ii[i];
937       aa = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           if (b) bb[*ridx] -= *aa*xx[*aj];
942           *aa = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951     for (i=0; i<m; i++) {
952       n  = ii[i+1] - ii[i];
953       aj = aij->j + ii[i];
954       aa = aij->a + ii[i];
955       for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           if (b) bb[i] -= *aa*xx[*aj];
958           *aa = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965   if (x) {
966     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
968   }
969   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
970   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
971   ierr = PetscFree(lrows);CHKERRQ(ierr);
972 
973   /* only change matrix nonzero state if pattern was allowed to be changed */
974   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
975     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
976     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMult_MPIAIJ"
983 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987   PetscInt       nt;
988 
989   PetscFunctionBegin;
990   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
991   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
992   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
994   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
995   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1001 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1002 {
1003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1013 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1014 {
1015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1021   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1022   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1028 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1029 {
1030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1031   PetscErrorCode ierr;
1032   PetscBool      merged;
1033 
1034   PetscFunctionBegin;
1035   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1036   /* do nondiagonal part */
1037   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1038   if (!merged) {
1039     /* send it on its way */
1040     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* receive remote parts: note this assumes the values are not actually */
1044     /* added in yy until the next line, */
1045     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1046   } else {
1047     /* do local part */
1048     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1049     /* send it on its way */
1050     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051     /* values actually were received in the Begin() but we need to call this nop */
1052     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1059 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1060 {
1061   MPI_Comm       comm;
1062   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1063   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1064   IS             Me,Notme;
1065   PetscErrorCode ierr;
1066   PetscInt       M,N,first,last,*notme,i;
1067   PetscMPIInt    size;
1068 
1069   PetscFunctionBegin;
1070   /* Easy test: symmetric diagonal block */
1071   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1072   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1073   if (!*f) PetscFunctionReturn(0);
1074   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1075   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1076   if (size == 1) PetscFunctionReturn(0);
1077 
1078   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1079   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1080   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1081   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1082   for (i=0; i<first; i++) notme[i] = i;
1083   for (i=last; i<M; i++) notme[i-last+first] = i;
1084   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1085   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1086   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1087   Aoff = Aoffs[0];
1088   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1089   Boff = Boffs[0];
1090   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1091   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1093   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1095   ierr = PetscFree(notme);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1101 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   /* do nondiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1109   /* send it on its way */
1110   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1111   /* do local part */
1112   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   /* receive remote parts */
1114   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119   This only works correctly for square matrices where the subblock A->A is the
1120    diagonal block
1121 */
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1124 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1125 {
1126   PetscErrorCode ierr;
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128 
1129   PetscFunctionBegin;
1130   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1131   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1132   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatScale_MPIAIJ"
1138 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1139 {
1140   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1145   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatDestroy_MPIAIJ"
1151 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1152 {
1153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157 #if defined(PETSC_USE_LOG)
1158   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1159 #endif
1160   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1161   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1162   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1164 #if defined(PETSC_USE_CTABLE)
1165   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1166 #else
1167   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1168 #endif
1169   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1170   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1171   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1172   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1173   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1174   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1175 
1176   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1185 #if defined(PETSC_HAVE_ELEMENTAL)
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1187 #endif
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1193 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1194 {
1195   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1196   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1197   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1198   PetscErrorCode ierr;
1199   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1200   int            fd;
1201   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1202   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1203   PetscScalar    *column_values;
1204   PetscInt       message_count,flowcontrolcount;
1205   FILE           *file;
1206 
1207   PetscFunctionBegin;
1208   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1209   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1210   nz   = A->nz + B->nz;
1211   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1212   if (!rank) {
1213     header[0] = MAT_FILE_CLASSID;
1214     header[1] = mat->rmap->N;
1215     header[2] = mat->cmap->N;
1216 
1217     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1219     /* get largest number of rows any processor has */
1220     rlen  = mat->rmap->n;
1221     range = mat->rmap->range;
1222     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1223   } else {
1224     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     rlen = mat->rmap->n;
1226   }
1227 
1228   /* load up the local row counts */
1229   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1230   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1231 
1232   /* store the row lengths to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1238       rlen = range[i+1] - range[i];
1239       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1241     }
1242     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1243   } else {
1244     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1245     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1249 
1250   /* load up the local column indices */
1251   nzmax = nz; /* th processor needs space a largest processor needs */
1252   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1254   cnt   = 0;
1255   for (i=0; i<mat->rmap->n; i++) {
1256     for (j=B->i[i]; j<B->i[i+1]; j++) {
1257       if ((col = garray[B->j[j]]) > cstart) break;
1258       column_indices[cnt++] = col;
1259     }
1260     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1261     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1262   }
1263   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1264 
1265   /* store the column indices to the file */
1266   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1267   if (!rank) {
1268     MPI_Status status;
1269     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     for (i=1; i<size; i++) {
1271       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1272       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1273       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1274       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1285 
1286   /* load up the local column values */
1287   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1288   cnt  = 0;
1289   for (i=0; i<mat->rmap->n; i++) {
1290     for (j=B->i[i]; j<B->i[i+1]; j++) {
1291       if (garray[B->j[j]] > cstart) break;
1292       column_values[cnt++] = B->a[j];
1293     }
1294     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1295     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1296   }
1297   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1298 
1299   /* store the column values to the file */
1300   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1301   if (!rank) {
1302     MPI_Status status;
1303     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     for (i=1; i<size; i++) {
1305       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1306       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1307       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1308       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1310     }
1311     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1312   } else {
1313     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1314     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1317   }
1318   ierr = PetscFree(column_values);CHKERRQ(ierr);
1319 
1320   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1321   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #include <petscdraw.h>
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1328 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1329 {
1330   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1331   PetscErrorCode    ierr;
1332   PetscMPIInt       rank = aij->rank,size = aij->size;
1333   PetscBool         isdraw,iascii,isbinary;
1334   PetscViewer       sviewer;
1335   PetscViewerFormat format;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1341   if (iascii) {
1342     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1343     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1344       MatInfo   info;
1345       PetscBool inodes;
1346 
1347       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1348       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1349       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1351       if (!inodes) {
1352         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1353                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1354       } else {
1355         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1356                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1357       }
1358       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1359       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1360       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1361       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1362       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1363       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1365       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1366       PetscFunctionReturn(0);
1367     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1368       PetscInt inodecount,inodelimit,*inodes;
1369       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1370       if (inodes) {
1371         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1372       } else {
1373         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1374       }
1375       PetscFunctionReturn(0);
1376     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1377       PetscFunctionReturn(0);
1378     }
1379   } else if (isbinary) {
1380     if (size == 1) {
1381       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1382       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1383     } else {
1384       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1385     }
1386     PetscFunctionReturn(0);
1387   } else if (isdraw) {
1388     PetscDraw draw;
1389     PetscBool isnull;
1390     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1391     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1392     if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1452     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ"
1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1461 {
1462   PetscErrorCode ierr;
1463   PetscBool      iascii,isdraw,issocket,isbinary;
1464 
1465   PetscFunctionBegin;
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1470   if (iascii || isdraw || isbinary || issocket) {
1471     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSOR_MPIAIJ"
1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscErrorCode ierr;
1482   Vec            bb1 = 0;
1483   PetscBool      hasop;
1484 
1485   PetscFunctionBegin;
1486   if (flag == SOR_APPLY_UPPER) {
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488     PetscFunctionReturn(0);
1489   }
1490 
1491   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1493   }
1494 
1495   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500 
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533     while (its--) {
1534       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536 
1537       /* update rhs: bb1 = bb - B*x */
1538       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1540 
1541       /* local sweep */
1542       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1543     }
1544   } else if (flag & SOR_EISENSTAT) {
1545     Vec xx1;
1546 
1547     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1549 
1550     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     if (!mat->diag) {
1553       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1554       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1555     }
1556     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1557     if (hasop) {
1558       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1559     } else {
1560       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1561     }
1562     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1563 
1564     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1565 
1566     /* local sweep */
1567     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1568     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1569     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1570   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1571 
1572   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1573 
1574   matin->errortype = mat->A->errortype;
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatPermute_MPIAIJ"
1580 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1581 {
1582   Mat            aA,aB,Aperm;
1583   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1584   PetscScalar    *aa,*ba;
1585   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1586   PetscSF        rowsf,sf;
1587   IS             parcolp = NULL;
1588   PetscBool      done;
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1593   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1594   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1595   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1596 
1597   /* Invert row permutation to find out where my rows should go */
1598   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1599   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1600   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1601   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1602   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1604 
1605   /* Invert column permutation to find out where my columns should go */
1606   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1607   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1608   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1609   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1610   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1611   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1613 
1614   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1615   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1616   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1617 
1618   /* Find out where my gcols should go */
1619   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1620   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1621   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1622   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1623   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1624   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1625   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1626   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1627 
1628   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1629   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1630   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1631   for (i=0; i<m; i++) {
1632     PetscInt row = rdest[i],rowner;
1633     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1634     for (j=ai[i]; j<ai[i+1]; j++) {
1635       PetscInt cowner,col = cdest[aj[j]];
1636       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1637       if (rowner == cowner) dnnz[i]++;
1638       else onnz[i]++;
1639     }
1640     for (j=bi[i]; j<bi[i+1]; j++) {
1641       PetscInt cowner,col = gcdest[bj[j]];
1642       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1643       if (rowner == cowner) dnnz[i]++;
1644       else onnz[i]++;
1645     }
1646   }
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1650   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1651   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1652 
1653   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1654   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1655   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1656   for (i=0; i<m; i++) {
1657     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1658     PetscInt j0,rowlen;
1659     rowlen = ai[i+1] - ai[i];
1660     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1661       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1662       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1663     }
1664     rowlen = bi[i+1] - bi[i];
1665     for (j0=j=0; j<rowlen; j0=j) {
1666       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1667       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1668     }
1669   }
1670   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1673   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1674   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1675   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1676   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1677   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1678   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1679   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1680   *B = Aperm;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1686 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1687 {
1688   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1693   if (ghosts) *ghosts = aij->garray;
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1699 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1700 {
1701   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1702   Mat            A    = mat->A,B = mat->B;
1703   PetscErrorCode ierr;
1704   PetscReal      isend[5],irecv[5];
1705 
1706   PetscFunctionBegin;
1707   info->block_size = 1.0;
1708   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1709 
1710   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1711   isend[3] = info->memory;  isend[4] = info->mallocs;
1712 
1713   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1714 
1715   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1716   isend[3] += info->memory;  isend[4] += info->mallocs;
1717   if (flag == MAT_LOCAL) {
1718     info->nz_used      = isend[0];
1719     info->nz_allocated = isend[1];
1720     info->nz_unneeded  = isend[2];
1721     info->memory       = isend[3];
1722     info->mallocs      = isend[4];
1723   } else if (flag == MAT_GLOBAL_MAX) {
1724     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1725 
1726     info->nz_used      = irecv[0];
1727     info->nz_allocated = irecv[1];
1728     info->nz_unneeded  = irecv[2];
1729     info->memory       = irecv[3];
1730     info->mallocs      = irecv[4];
1731   } else if (flag == MAT_GLOBAL_SUM) {
1732     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1733 
1734     info->nz_used      = irecv[0];
1735     info->nz_allocated = irecv[1];
1736     info->nz_unneeded  = irecv[2];
1737     info->memory       = irecv[3];
1738     info->mallocs      = irecv[4];
1739   }
1740   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1741   info->fill_ratio_needed = 0;
1742   info->factor_mallocs    = 0;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSetOption_MPIAIJ"
1748 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1749 {
1750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   switch (op) {
1755   case MAT_NEW_NONZERO_LOCATIONS:
1756   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1757   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1758   case MAT_KEEP_NONZERO_PATTERN:
1759   case MAT_NEW_NONZERO_LOCATION_ERR:
1760   case MAT_USE_INODES:
1761   case MAT_IGNORE_ZERO_ENTRIES:
1762     MatCheckPreallocated(A,1);
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1765     break;
1766   case MAT_ROW_ORIENTED:
1767     MatCheckPreallocated(A,1);
1768     a->roworiented = flg;
1769 
1770     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1771     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1772     break;
1773   case MAT_NEW_DIAGONALS:
1774     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1775     break;
1776   case MAT_IGNORE_OFF_PROC_ENTRIES:
1777     a->donotstash = flg;
1778     break;
1779   case MAT_SPD:
1780     A->spd_set = PETSC_TRUE;
1781     A->spd     = flg;
1782     if (flg) {
1783       A->symmetric                  = PETSC_TRUE;
1784       A->structurally_symmetric     = PETSC_TRUE;
1785       A->symmetric_set              = PETSC_TRUE;
1786       A->structurally_symmetric_set = PETSC_TRUE;
1787     }
1788     break;
1789   case MAT_SYMMETRIC:
1790     MatCheckPreallocated(A,1);
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_STRUCTURALLY_SYMMETRIC:
1794     MatCheckPreallocated(A,1);
1795     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1796     break;
1797   case MAT_HERMITIAN:
1798     MatCheckPreallocated(A,1);
1799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1800     break;
1801   case MAT_SYMMETRY_ETERNAL:
1802     MatCheckPreallocated(A,1);
1803     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1804     break;
1805   default:
1806     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1807   }
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatGetRow_MPIAIJ"
1813 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1814 {
1815   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1816   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1817   PetscErrorCode ierr;
1818   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1819   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1820   PetscInt       *cmap,*idx_p;
1821 
1822   PetscFunctionBegin;
1823   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1824   mat->getrowactive = PETSC_TRUE;
1825 
1826   if (!mat->rowvalues && (idx || v)) {
1827     /*
1828         allocate enough space to hold information from the longest row.
1829     */
1830     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1831     PetscInt   max = 1,tmp;
1832     for (i=0; i<matin->rmap->n; i++) {
1833       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1834       if (max < tmp) max = tmp;
1835     }
1836     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1837   }
1838 
1839   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1840   lrow = row - rstart;
1841 
1842   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1843   if (!v)   {pvA = 0; pvB = 0;}
1844   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1845   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1846   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1847   nztot = nzA + nzB;
1848 
1849   cmap = mat->garray;
1850   if (v  || idx) {
1851     if (nztot) {
1852       /* Sort by increasing column numbers, assuming A and B already sorted */
1853       PetscInt imark = -1;
1854       if (v) {
1855         *v = v_p = mat->rowvalues;
1856         for (i=0; i<nzB; i++) {
1857           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1858           else break;
1859         }
1860         imark = i;
1861         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1862         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1863       }
1864       if (idx) {
1865         *idx = idx_p = mat->rowindices;
1866         if (imark > -1) {
1867           for (i=0; i<imark; i++) {
1868             idx_p[i] = cmap[cworkB[i]];
1869           }
1870         } else {
1871           for (i=0; i<nzB; i++) {
1872             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1873             else break;
1874           }
1875           imark = i;
1876         }
1877         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1878         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1879       }
1880     } else {
1881       if (idx) *idx = 0;
1882       if (v)   *v   = 0;
1883     }
1884   }
1885   *nz  = nztot;
1886   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1887   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1893 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1894 {
1895   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1896 
1897   PetscFunctionBegin;
1898   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1899   aij->getrowactive = PETSC_FALSE;
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatNorm_MPIAIJ"
1905 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1906 {
1907   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1908   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1909   PetscErrorCode ierr;
1910   PetscInt       i,j,cstart = mat->cmap->rstart;
1911   PetscReal      sum = 0.0;
1912   MatScalar      *v;
1913 
1914   PetscFunctionBegin;
1915   if (aij->size == 1) {
1916     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1917   } else {
1918     if (type == NORM_FROBENIUS) {
1919       v = amat->a;
1920       for (i=0; i<amat->nz; i++) {
1921         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1922       }
1923       v = bmat->a;
1924       for (i=0; i<bmat->nz; i++) {
1925         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1926       }
1927       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1928       *norm = PetscSqrtReal(*norm);
1929       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1930     } else if (type == NORM_1) { /* max column norm */
1931       PetscReal *tmp,*tmp2;
1932       PetscInt  *jj,*garray = aij->garray;
1933       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1934       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1935       *norm = 0.0;
1936       v     = amat->a; jj = amat->j;
1937       for (j=0; j<amat->nz; j++) {
1938         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1939       }
1940       v = bmat->a; jj = bmat->j;
1941       for (j=0; j<bmat->nz; j++) {
1942         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1943       }
1944       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1945       for (j=0; j<mat->cmap->N; j++) {
1946         if (tmp2[j] > *norm) *norm = tmp2[j];
1947       }
1948       ierr = PetscFree(tmp);CHKERRQ(ierr);
1949       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1950       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1951     } else if (type == NORM_INFINITY) { /* max row norm */
1952       PetscReal ntemp = 0.0;
1953       for (j=0; j<aij->A->rmap->n; j++) {
1954         v   = amat->a + amat->i[j];
1955         sum = 0.0;
1956         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1957           sum += PetscAbsScalar(*v); v++;
1958         }
1959         v = bmat->a + bmat->i[j];
1960         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1961           sum += PetscAbsScalar(*v); v++;
1962         }
1963         if (sum > ntemp) ntemp = sum;
1964       }
1965       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1966       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1967     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatTranspose_MPIAIJ"
1974 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1975 {
1976   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1977   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1978   PetscErrorCode ierr;
1979   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1980   PetscInt       cstart = A->cmap->rstart,ncol;
1981   Mat            B;
1982   MatScalar      *array;
1983 
1984   PetscFunctionBegin;
1985   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1986 
1987   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1988   ai = Aloc->i; aj = Aloc->j;
1989   bi = Bloc->i; bj = Bloc->j;
1990   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1991     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1992     PetscSFNode          *oloc;
1993     PETSC_UNUSED PetscSF sf;
1994 
1995     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1996     /* compute d_nnz for preallocation */
1997     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1998     for (i=0; i<ai[ma]; i++) {
1999       d_nnz[aj[i]]++;
2000       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2001     }
2002     /* compute local off-diagonal contributions */
2003     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2004     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2005     /* map those to global */
2006     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2007     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2008     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2009     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2010     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2011     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2012     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2013 
2014     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2015     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2016     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2017     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2018     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2019     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2020   } else {
2021     B    = *matout;
2022     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2023     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2024   }
2025 
2026   /* copy over the A part */
2027   array = Aloc->a;
2028   row   = A->rmap->rstart;
2029   for (i=0; i<ma; i++) {
2030     ncol = ai[i+1]-ai[i];
2031     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2032     row++;
2033     array += ncol; aj += ncol;
2034   }
2035   aj = Aloc->j;
2036   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2037 
2038   /* copy over the B part */
2039   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2040   array = Bloc->a;
2041   row   = A->rmap->rstart;
2042   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2043   cols_tmp = cols;
2044   for (i=0; i<mb; i++) {
2045     ncol = bi[i+1]-bi[i];
2046     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2047     row++;
2048     array += ncol; cols_tmp += ncol;
2049   }
2050   ierr = PetscFree(cols);CHKERRQ(ierr);
2051 
2052   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2053   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2054   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2055     *matout = B;
2056   } else {
2057     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2058   }
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2064 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2065 {
2066   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2067   Mat            a    = aij->A,b = aij->B;
2068   PetscErrorCode ierr;
2069   PetscInt       s1,s2,s3;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2073   if (rr) {
2074     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2075     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2076     /* Overlap communication with computation. */
2077     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2078   }
2079   if (ll) {
2080     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2081     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2082     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2083   }
2084   /* scale  the diagonal block */
2085   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2086 
2087   if (rr) {
2088     /* Do a scatter end and then right scale the off-diagonal block */
2089     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2090     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2097 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2098 {
2099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2100   PetscErrorCode ierr;
2101 
2102   PetscFunctionBegin;
2103   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatEqual_MPIAIJ"
2109 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2110 {
2111   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2112   Mat            a,b,c,d;
2113   PetscBool      flg;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117   a = matA->A; b = matA->B;
2118   c = matB->A; d = matB->B;
2119 
2120   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2121   if (flg) {
2122     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2123   }
2124   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatCopy_MPIAIJ"
2130 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2131 {
2132   PetscErrorCode ierr;
2133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2134   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2135 
2136   PetscFunctionBegin;
2137   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2138   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2139     /* because of the column compression in the off-processor part of the matrix a->B,
2140        the number of columns in a->B and b->B may be different, hence we cannot call
2141        the MatCopy() directly on the two parts. If need be, we can provide a more
2142        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2143        then copying the submatrices */
2144     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2145   } else {
2146     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2147     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2148   }
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatSetUp_MPIAIJ"
2154 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2155 {
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 /*
2164    Computes the number of nonzeros per row needed for preallocation when X and Y
2165    have different nonzero structure.
2166 */
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2169 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2170 {
2171   PetscInt       i,j,k,nzx,nzy;
2172 
2173   PetscFunctionBegin;
2174   /* Set the number of nonzeros in the new matrix */
2175   for (i=0; i<m; i++) {
2176     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2177     nzx = xi[i+1] - xi[i];
2178     nzy = yi[i+1] - yi[i];
2179     nnz[i] = 0;
2180     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2181       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2182       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2183       nnz[i]++;
2184     }
2185     for (; k<nzy; k++) nnz[i]++;
2186   }
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2193 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2194 {
2195   PetscErrorCode ierr;
2196   PetscInt       m = Y->rmap->N;
2197   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2198   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2199 
2200   PetscFunctionBegin;
2201   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatAXPY_MPIAIJ"
2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2208 {
2209   PetscErrorCode ierr;
2210   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2211   PetscBLASInt   bnz,one=1;
2212   Mat_SeqAIJ     *x,*y;
2213 
2214   PetscFunctionBegin;
2215   if (str == SAME_NONZERO_PATTERN) {
2216     PetscScalar alpha = a;
2217     x    = (Mat_SeqAIJ*)xx->A->data;
2218     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2219     y    = (Mat_SeqAIJ*)yy->A->data;
2220     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2221     x    = (Mat_SeqAIJ*)xx->B->data;
2222     y    = (Mat_SeqAIJ*)yy->B->data;
2223     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2224     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2225     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2226   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2227     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2228   } else {
2229     Mat      B;
2230     PetscInt *nnz_d,*nnz_o;
2231     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2232     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2233     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2234     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2235     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2236     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2237     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2238     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2239     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2240     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2241     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2242     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2243     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2244     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2245   }
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatConjugate_MPIAIJ"
2253 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2254 {
2255 #if defined(PETSC_USE_COMPLEX)
2256   PetscErrorCode ierr;
2257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2258 
2259   PetscFunctionBegin;
2260   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2261   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2262 #else
2263   PetscFunctionBegin;
2264 #endif
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatRealPart_MPIAIJ"
2270 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2271 {
2272   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2273   PetscErrorCode ierr;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2277   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2283 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2284 {
2285   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2290   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2296 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2297 {
2298   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2299   PetscErrorCode ierr;
2300   PetscInt       i,*idxb = 0;
2301   PetscScalar    *va,*vb;
2302   Vec            vtmp;
2303 
2304   PetscFunctionBegin;
2305   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2306   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2307   if (idx) {
2308     for (i=0; i<A->rmap->n; i++) {
2309       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2310     }
2311   }
2312 
2313   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2314   if (idx) {
2315     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2316   }
2317   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2318   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2319 
2320   for (i=0; i<A->rmap->n; i++) {
2321     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2322       va[i] = vb[i];
2323       if (idx) idx[i] = a->garray[idxb[i]];
2324     }
2325   }
2326 
2327   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2328   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2329   ierr = PetscFree(idxb);CHKERRQ(ierr);
2330   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2336 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2337 {
2338   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2339   PetscErrorCode ierr;
2340   PetscInt       i,*idxb = 0;
2341   PetscScalar    *va,*vb;
2342   Vec            vtmp;
2343 
2344   PetscFunctionBegin;
2345   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2346   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2347   if (idx) {
2348     for (i=0; i<A->cmap->n; i++) {
2349       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2350     }
2351   }
2352 
2353   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2354   if (idx) {
2355     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2356   }
2357   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2358   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2359 
2360   for (i=0; i<A->rmap->n; i++) {
2361     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2362       va[i] = vb[i];
2363       if (idx) idx[i] = a->garray[idxb[i]];
2364     }
2365   }
2366 
2367   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2368   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2369   ierr = PetscFree(idxb);CHKERRQ(ierr);
2370   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2376 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2377 {
2378   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2379   PetscInt       n      = A->rmap->n;
2380   PetscInt       cstart = A->cmap->rstart;
2381   PetscInt       *cmap  = mat->garray;
2382   PetscInt       *diagIdx, *offdiagIdx;
2383   Vec            diagV, offdiagV;
2384   PetscScalar    *a, *diagA, *offdiagA;
2385   PetscInt       r;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2391   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2392   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2393   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2394   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2395   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2396   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2397   for (r = 0; r < n; ++r) {
2398     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2399       a[r]   = diagA[r];
2400       idx[r] = cstart + diagIdx[r];
2401     } else {
2402       a[r]   = offdiagA[r];
2403       idx[r] = cmap[offdiagIdx[r]];
2404     }
2405   }
2406   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2408   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2409   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2410   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2411   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2418 {
2419   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2420   PetscInt       n      = A->rmap->n;
2421   PetscInt       cstart = A->cmap->rstart;
2422   PetscInt       *cmap  = mat->garray;
2423   PetscInt       *diagIdx, *offdiagIdx;
2424   Vec            diagV, offdiagV;
2425   PetscScalar    *a, *diagA, *offdiagA;
2426   PetscInt       r;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2436   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2437   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2438   for (r = 0; r < n; ++r) {
2439     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2440       a[r]   = diagA[r];
2441       idx[r] = cstart + diagIdx[r];
2442     } else {
2443       a[r]   = offdiagA[r];
2444       idx[r] = cmap[offdiagIdx[r]];
2445     }
2446   }
2447   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2451   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2452   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2458 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2459 {
2460   PetscErrorCode ierr;
2461   Mat            *dummy;
2462 
2463   PetscFunctionBegin;
2464   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2465   *newmat = *dummy;
2466   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2472 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2473 {
2474   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2479   A->errortype = a->A->errortype;
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2485 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2486 {
2487   PetscErrorCode ierr;
2488   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2489 
2490   PetscFunctionBegin;
2491   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2492   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2493   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2500 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2501 {
2502   PetscFunctionBegin;
2503   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2504   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 #undef __FUNCT__
2509 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2510 /*@
2511    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2512 
2513    Collective on Mat
2514 
2515    Input Parameters:
2516 +    A - the matrix
2517 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2518 
2519  Level: advanced
2520 
2521 @*/
2522 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2523 {
2524   PetscErrorCode       ierr;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 #undef __FUNCT__
2532 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2533 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2534 {
2535   PetscErrorCode       ierr;
2536   PetscBool            sc = PETSC_FALSE,flg;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2540   ierr = PetscObjectOptionsBegin((PetscObject)A);
2541     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2542     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2543     if (flg) {
2544       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2545     }
2546   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 #undef __FUNCT__
2551 #define __FUNCT__ "MatShift_MPIAIJ"
2552 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2553 {
2554   PetscErrorCode ierr;
2555   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2556   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2557 
2558   PetscFunctionBegin;
2559   if (!Y->preallocated) {
2560     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2561   } else if (!aij->nz) {
2562     PetscInt nonew = aij->nonew;
2563     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2564     aij->nonew = nonew;
2565   }
2566   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 #undef __FUNCT__
2571 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2572 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2573 {
2574   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2579   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2580   if (d) {
2581     PetscInt rstart;
2582     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2583     *d += rstart;
2584 
2585   }
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 
2590 /* -------------------------------------------------------------------*/
2591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2592                                        MatGetRow_MPIAIJ,
2593                                        MatRestoreRow_MPIAIJ,
2594                                        MatMult_MPIAIJ,
2595                                 /* 4*/ MatMultAdd_MPIAIJ,
2596                                        MatMultTranspose_MPIAIJ,
2597                                        MatMultTransposeAdd_MPIAIJ,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*10*/ 0,
2602                                        0,
2603                                        0,
2604                                        MatSOR_MPIAIJ,
2605                                        MatTranspose_MPIAIJ,
2606                                 /*15*/ MatGetInfo_MPIAIJ,
2607                                        MatEqual_MPIAIJ,
2608                                        MatGetDiagonal_MPIAIJ,
2609                                        MatDiagonalScale_MPIAIJ,
2610                                        MatNorm_MPIAIJ,
2611                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2612                                        MatAssemblyEnd_MPIAIJ,
2613                                        MatSetOption_MPIAIJ,
2614                                        MatZeroEntries_MPIAIJ,
2615                                 /*24*/ MatZeroRows_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*29*/ MatSetUp_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*34*/ MatDuplicate_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                 /*39*/ MatAXPY_MPIAIJ,
2631                                        MatGetSubMatrices_MPIAIJ,
2632                                        MatIncreaseOverlap_MPIAIJ,
2633                                        MatGetValues_MPIAIJ,
2634                                        MatCopy_MPIAIJ,
2635                                 /*44*/ MatGetRowMax_MPIAIJ,
2636                                        MatScale_MPIAIJ,
2637                                        MatShift_MPIAIJ,
2638                                        MatDiagonalSet_MPIAIJ,
2639                                        MatZeroRowsColumns_MPIAIJ,
2640                                 /*49*/ MatSetRandom_MPIAIJ,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2646                                        0,
2647                                        MatSetUnfactored_MPIAIJ,
2648                                        MatPermute_MPIAIJ,
2649                                        0,
2650                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2651                                        MatDestroy_MPIAIJ,
2652                                        MatView_MPIAIJ,
2653                                        0,
2654                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2655                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2656                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2661                                        MatGetRowMinAbs_MPIAIJ,
2662                                        0,
2663                                        MatSetColoring_MPIAIJ,
2664                                        0,
2665                                        MatSetValuesAdifor_MPIAIJ,
2666                                 /*75*/ MatFDColoringApply_AIJ,
2667                                        MatSetFromOptions_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                        MatFindZeroDiagonals_MPIAIJ,
2671                                 /*80*/ 0,
2672                                        0,
2673                                        0,
2674                                 /*83*/ MatLoad_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2681                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2682                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2683                                        MatPtAP_MPIAIJ_MPIAIJ,
2684                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2685                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                 /*99*/ 0,
2691                                        0,
2692                                        0,
2693                                        MatConjugate_MPIAIJ,
2694                                        0,
2695                                 /*104*/MatSetValuesRow_MPIAIJ,
2696                                        MatRealPart_MPIAIJ,
2697                                        MatImaginaryPart_MPIAIJ,
2698                                        0,
2699                                        0,
2700                                 /*109*/0,
2701                                        0,
2702                                        MatGetRowMin_MPIAIJ,
2703                                        0,
2704                                        MatMissingDiagonal_MPIAIJ,
2705                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2706                                        0,
2707                                        MatGetGhosts_MPIAIJ,
2708                                        0,
2709                                        0,
2710                                 /*119*/0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        MatGetMultiProcBlock_MPIAIJ,
2715                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2716                                        MatGetColumnNorms_MPIAIJ,
2717                                        MatInvertBlockDiagonal_MPIAIJ,
2718                                        0,
2719                                        MatGetSubMatricesMPI_MPIAIJ,
2720                                 /*129*/0,
2721                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2722                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2723                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2724                                        0,
2725                                 /*134*/0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                 /*139*/0,
2731                                        0,
2732                                        0,
2733                                        MatFDColoringSetUp_MPIXAIJ,
2734                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2735                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2736 };
2737 
2738 /* ----------------------------------------------------------------------------------------*/
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2742 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2743 {
2744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2749   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2755 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2756 {
2757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2762   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2763   PetscFunctionReturn(0);
2764 }
2765 
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2768 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2769 {
2770   Mat_MPIAIJ     *b;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2775   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2776   b = (Mat_MPIAIJ*)B->data;
2777 
2778   if (!B->preallocated) {
2779     /* Explicitly create 2 MATSEQAIJ matrices. */
2780     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2781     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2782     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2783     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2784     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2787     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2788     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2789     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2790   }
2791 
2792   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2793   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2794   B->preallocated = PETSC_TRUE;
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2801 {
2802   Mat            mat;
2803   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   *newmat = 0;
2808   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2809   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2810   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2811   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2812   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2813   a       = (Mat_MPIAIJ*)mat->data;
2814 
2815   mat->factortype   = matin->factortype;
2816   mat->assembled    = PETSC_TRUE;
2817   mat->insertmode   = NOT_SET_VALUES;
2818   mat->preallocated = PETSC_TRUE;
2819 
2820   a->size         = oldmat->size;
2821   a->rank         = oldmat->rank;
2822   a->donotstash   = oldmat->donotstash;
2823   a->roworiented  = oldmat->roworiented;
2824   a->rowindices   = 0;
2825   a->rowvalues    = 0;
2826   a->getrowactive = PETSC_FALSE;
2827 
2828   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2830 
2831   if (oldmat->colmap) {
2832 #if defined(PETSC_USE_CTABLE)
2833     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2834 #else
2835     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2838 #endif
2839   } else a->colmap = 0;
2840   if (oldmat->garray) {
2841     PetscInt len;
2842     len  = oldmat->B->cmap->n;
2843     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2845     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2846   } else a->garray = 0;
2847 
2848   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2849   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2850   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2851   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2852   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2853   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2854   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2855   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2856   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2857   *newmat = mat;
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatLoad_MPIAIJ"
2865 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2866 {
2867   PetscScalar    *vals,*svals;
2868   MPI_Comm       comm;
2869   PetscErrorCode ierr;
2870   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2871   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2872   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2873   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2874   PetscInt       cend,cstart,n,*rowners;
2875   int            fd;
2876   PetscInt       bs = newMat->rmap->bs;
2877 
2878   PetscFunctionBegin;
2879   /* force binary viewer to load .info file if it has not yet done so */
2880   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2881   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2882   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2883   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2884   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2885   if (!rank) {
2886     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2887     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2888     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ");
2889   }
2890 
2891   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2892   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2893   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2894   if (bs < 0) bs = 1;
2895 
2896   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2897   M    = header[1]; N = header[2];
2898 
2899   /* If global sizes are set, check if they are consistent with that given in the file */
2900   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2901   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2902 
2903   /* determine ownership of all (block) rows */
2904   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2905   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2906   else m = newMat->rmap->n; /* Set by user */
2907 
2908   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2909   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2910 
2911   /* First process needs enough room for process with most rows */
2912   if (!rank) {
2913     mmax = rowners[1];
2914     for (i=2; i<=size; i++) {
2915       mmax = PetscMax(mmax, rowners[i]);
2916     }
2917   } else mmax = -1;             /* unused, but compilers complain */
2918 
2919   rowners[0] = 0;
2920   for (i=2; i<=size; i++) {
2921     rowners[i] += rowners[i-1];
2922   }
2923   rstart = rowners[rank];
2924   rend   = rowners[rank+1];
2925 
2926   /* distribute row lengths to all processors */
2927   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2928   if (!rank) {
2929     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2930     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2931     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2932     for (j=0; j<m; j++) {
2933       procsnz[0] += ourlens[j];
2934     }
2935     for (i=1; i<size; i++) {
2936       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2937       /* calculate the number of nonzeros on each processor */
2938       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2939         procsnz[i] += rowlengths[j];
2940       }
2941       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2942     }
2943     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2944   } else {
2945     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2946   }
2947 
2948   if (!rank) {
2949     /* determine max buffer needed and allocate it */
2950     maxnz = 0;
2951     for (i=0; i<size; i++) {
2952       maxnz = PetscMax(maxnz,procsnz[i]);
2953     }
2954     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2955 
2956     /* read in my part of the matrix column indices  */
2957     nz   = procsnz[0];
2958     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2959     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2960 
2961     /* read in every one elses and ship off */
2962     for (i=1; i<size; i++) {
2963       nz   = procsnz[i];
2964       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2965       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2966     }
2967     ierr = PetscFree(cols);CHKERRQ(ierr);
2968   } else {
2969     /* determine buffer space needed for message */
2970     nz = 0;
2971     for (i=0; i<m; i++) {
2972       nz += ourlens[i];
2973     }
2974     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2975 
2976     /* receive message of column indices*/
2977     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2978   }
2979 
2980   /* determine column ownership if matrix is not square */
2981   if (N != M) {
2982     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2983     else n = newMat->cmap->n;
2984     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2985     cstart = cend - n;
2986   } else {
2987     cstart = rstart;
2988     cend   = rend;
2989     n      = cend - cstart;
2990   }
2991 
2992   /* loop over local rows, determining number of off diagonal entries */
2993   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2994   jj   = 0;
2995   for (i=0; i<m; i++) {
2996     for (j=0; j<ourlens[i]; j++) {
2997       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2998       jj++;
2999     }
3000   }
3001 
3002   for (i=0; i<m; i++) {
3003     ourlens[i] -= offlens[i];
3004   }
3005   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3006 
3007   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3008 
3009   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3010 
3011   for (i=0; i<m; i++) {
3012     ourlens[i] += offlens[i];
3013   }
3014 
3015   if (!rank) {
3016     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3017 
3018     /* read in my part of the matrix numerical values  */
3019     nz   = procsnz[0];
3020     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3021 
3022     /* insert into matrix */
3023     jj      = rstart;
3024     smycols = mycols;
3025     svals   = vals;
3026     for (i=0; i<m; i++) {
3027       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3028       smycols += ourlens[i];
3029       svals   += ourlens[i];
3030       jj++;
3031     }
3032 
3033     /* read in other processors and ship out */
3034     for (i=1; i<size; i++) {
3035       nz   = procsnz[i];
3036       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3037       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3038     }
3039     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3040   } else {
3041     /* receive numeric values */
3042     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3043 
3044     /* receive message of values*/
3045     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3046 
3047     /* insert into matrix */
3048     jj      = rstart;
3049     smycols = mycols;
3050     svals   = vals;
3051     for (i=0; i<m; i++) {
3052       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3053       smycols += ourlens[i];
3054       svals   += ourlens[i];
3055       jj++;
3056     }
3057   }
3058   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3059   ierr = PetscFree(vals);CHKERRQ(ierr);
3060   ierr = PetscFree(mycols);CHKERRQ(ierr);
3061   ierr = PetscFree(rowners);CHKERRQ(ierr);
3062   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3063   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3064   PetscFunctionReturn(0);
3065 }
3066 
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3069 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3070 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3071 {
3072   PetscErrorCode ierr;
3073   IS             iscol_local;
3074   PetscInt       csize;
3075 
3076   PetscFunctionBegin;
3077   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3078   if (call == MAT_REUSE_MATRIX) {
3079     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3080     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3081   } else {
3082     /* check if we are grabbing all columns*/
3083     PetscBool    isstride;
3084     PetscMPIInt  lisstride = 0,gisstride;
3085     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3086     if (isstride) {
3087       PetscInt  start,len,mstart,mlen;
3088       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3089       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3090       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3091       if (mstart == start && mlen-mstart == len) lisstride = 1;
3092     }
3093     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3094     if (gisstride) {
3095       PetscInt N;
3096       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3097       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3098       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3099       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3100     } else {
3101       PetscInt cbs;
3102       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3103       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3104       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3105     }
3106   }
3107   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3108   if (call == MAT_INITIAL_MATRIX) {
3109     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3110     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3111   }
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3118 /*
3119     Not great since it makes two copies of the submatrix, first an SeqAIJ
3120   in local and then by concatenating the local matrices the end result.
3121   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3122 
3123   Note: This requires a sequential iscol with all indices.
3124 */
3125 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3126 {
3127   PetscErrorCode ierr;
3128   PetscMPIInt    rank,size;
3129   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3130   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3131   PetscBool      allcolumns, colflag;
3132   Mat            M,Mreuse;
3133   MatScalar      *vwork,*aa;
3134   MPI_Comm       comm;
3135   Mat_SeqAIJ     *aij;
3136 
3137   PetscFunctionBegin;
3138   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3139   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3141 
3142   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3143   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3144   if (colflag && ncol == mat->cmap->N) {
3145     allcolumns = PETSC_TRUE;
3146     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3147   } else {
3148     allcolumns = PETSC_FALSE;
3149   }
3150   if (call ==  MAT_REUSE_MATRIX) {
3151     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3152     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3153     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3154   } else {
3155     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3156   }
3157 
3158   /*
3159       m - number of local rows
3160       n - number of columns (same on all processors)
3161       rstart - first row in new global matrix generated
3162   */
3163   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3164   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3165   if (call == MAT_INITIAL_MATRIX) {
3166     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3167     ii  = aij->i;
3168     jj  = aij->j;
3169 
3170     /*
3171         Determine the number of non-zeros in the diagonal and off-diagonal
3172         portions of the matrix in order to do correct preallocation
3173     */
3174 
3175     /* first get start and end of "diagonal" columns */
3176     if (csize == PETSC_DECIDE) {
3177       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3178       if (mglobal == n) { /* square matrix */
3179         nlocal = m;
3180       } else {
3181         nlocal = n/size + ((n % size) > rank);
3182       }
3183     } else {
3184       nlocal = csize;
3185     }
3186     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3187     rstart = rend - nlocal;
3188     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3189 
3190     /* next, compute all the lengths */
3191     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3192     olens = dlens + m;
3193     for (i=0; i<m; i++) {
3194       jend = ii[i+1] - ii[i];
3195       olen = 0;
3196       dlen = 0;
3197       for (j=0; j<jend; j++) {
3198         if (*jj < rstart || *jj >= rend) olen++;
3199         else dlen++;
3200         jj++;
3201       }
3202       olens[i] = olen;
3203       dlens[i] = dlen;
3204     }
3205     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3206     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3207     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3208     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3209     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3210     ierr = PetscFree(dlens);CHKERRQ(ierr);
3211   } else {
3212     PetscInt ml,nl;
3213 
3214     M    = *newmat;
3215     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3216     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3217     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3218     /*
3219          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3220        rather than the slower MatSetValues().
3221     */
3222     M->was_assembled = PETSC_TRUE;
3223     M->assembled     = PETSC_FALSE;
3224   }
3225   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3226   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3227   ii   = aij->i;
3228   jj   = aij->j;
3229   aa   = aij->a;
3230   for (i=0; i<m; i++) {
3231     row   = rstart + i;
3232     nz    = ii[i+1] - ii[i];
3233     cwork = jj;     jj += nz;
3234     vwork = aa;     aa += nz;
3235     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3236   }
3237 
3238   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3239   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3240   *newmat = M;
3241 
3242   /* save submatrix used in processor for next request */
3243   if (call ==  MAT_INITIAL_MATRIX) {
3244     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3245     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3246   }
3247   PetscFunctionReturn(0);
3248 }
3249 
3250 #undef __FUNCT__
3251 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3252 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3253 {
3254   PetscInt       m,cstart, cend,j,nnz,i,d;
3255   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3256   const PetscInt *JJ;
3257   PetscScalar    *values;
3258   PetscErrorCode ierr;
3259 
3260   PetscFunctionBegin;
3261   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3262 
3263   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3264   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3265   m      = B->rmap->n;
3266   cstart = B->cmap->rstart;
3267   cend   = B->cmap->rend;
3268   rstart = B->rmap->rstart;
3269 
3270   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3271 
3272 #if defined(PETSC_USE_DEBUGGING)
3273   for (i=0; i<m; i++) {
3274     nnz = Ii[i+1]- Ii[i];
3275     JJ  = J + Ii[i];
3276     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3277     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3278     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3279   }
3280 #endif
3281 
3282   for (i=0; i<m; i++) {
3283     nnz     = Ii[i+1]- Ii[i];
3284     JJ      = J + Ii[i];
3285     nnz_max = PetscMax(nnz_max,nnz);
3286     d       = 0;
3287     for (j=0; j<nnz; j++) {
3288       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3289     }
3290     d_nnz[i] = d;
3291     o_nnz[i] = nnz - d;
3292   }
3293   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3294   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3295 
3296   if (v) values = (PetscScalar*)v;
3297   else {
3298     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3299   }
3300 
3301   for (i=0; i<m; i++) {
3302     ii   = i + rstart;
3303     nnz  = Ii[i+1]- Ii[i];
3304     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3305   }
3306   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3307   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3308 
3309   if (!v) {
3310     ierr = PetscFree(values);CHKERRQ(ierr);
3311   }
3312   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3318 /*@
3319    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3320    (the default parallel PETSc format).
3321 
3322    Collective on MPI_Comm
3323 
3324    Input Parameters:
3325 +  B - the matrix
3326 .  i - the indices into j for the start of each local row (starts with zero)
3327 .  j - the column indices for each local row (starts with zero)
3328 -  v - optional values in the matrix
3329 
3330    Level: developer
3331 
3332    Notes:
3333        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3334      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3335      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3336 
3337        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3338 
3339        The format which is used for the sparse matrix input, is equivalent to a
3340     row-major ordering.. i.e for the following matrix, the input data expected is
3341     as shown
3342 
3343 $        1 0 0
3344 $        2 0 3     P0
3345 $       -------
3346 $        4 5 6     P1
3347 $
3348 $     Process0 [P0]: rows_owned=[0,1]
3349 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3350 $        j =  {0,0,2}  [size = 3]
3351 $        v =  {1,2,3}  [size = 3]
3352 $
3353 $     Process1 [P1]: rows_owned=[2]
3354 $        i =  {0,3}    [size = nrow+1  = 1+1]
3355 $        j =  {0,1,2}  [size = 3]
3356 $        v =  {4,5,6}  [size = 3]
3357 
3358 .keywords: matrix, aij, compressed row, sparse, parallel
3359 
3360 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3361           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3362 @*/
3363 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3364 {
3365   PetscErrorCode ierr;
3366 
3367   PetscFunctionBegin;
3368   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3369   PetscFunctionReturn(0);
3370 }
3371 
3372 #undef __FUNCT__
3373 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3374 /*@C
3375    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3376    (the default parallel PETSc format).  For good matrix assembly performance
3377    the user should preallocate the matrix storage by setting the parameters
3378    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3379    performance can be increased by more than a factor of 50.
3380 
3381    Collective on MPI_Comm
3382 
3383    Input Parameters:
3384 +  B - the matrix
3385 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3386            (same value is used for all local rows)
3387 .  d_nnz - array containing the number of nonzeros in the various rows of the
3388            DIAGONAL portion of the local submatrix (possibly different for each row)
3389            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3390            The size of this array is equal to the number of local rows, i.e 'm'.
3391            For matrices that will be factored, you must leave room for (and set)
3392            the diagonal entry even if it is zero.
3393 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3394            submatrix (same value is used for all local rows).
3395 -  o_nnz - array containing the number of nonzeros in the various rows of the
3396            OFF-DIAGONAL portion of the local submatrix (possibly different for
3397            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3398            structure. The size of this array is equal to the number
3399            of local rows, i.e 'm'.
3400 
3401    If the *_nnz parameter is given then the *_nz parameter is ignored
3402 
3403    The AIJ format (also called the Yale sparse matrix format or
3404    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3405    storage.  The stored row and column indices begin with zero.
3406    See Users-Manual: ch_mat for details.
3407 
3408    The parallel matrix is partitioned such that the first m0 rows belong to
3409    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3410    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3411 
3412    The DIAGONAL portion of the local submatrix of a processor can be defined
3413    as the submatrix which is obtained by extraction the part corresponding to
3414    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3415    first row that belongs to the processor, r2 is the last row belonging to
3416    the this processor, and c1-c2 is range of indices of the local part of a
3417    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3418    common case of a square matrix, the row and column ranges are the same and
3419    the DIAGONAL part is also square. The remaining portion of the local
3420    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3421 
3422    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3423 
3424    You can call MatGetInfo() to get information on how effective the preallocation was;
3425    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3426    You can also run with the option -info and look for messages with the string
3427    malloc in them to see if additional memory allocation was needed.
3428 
3429    Example usage:
3430 
3431    Consider the following 8x8 matrix with 34 non-zero values, that is
3432    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3433    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3434    as follows:
3435 
3436 .vb
3437             1  2  0  |  0  3  0  |  0  4
3438     Proc0   0  5  6  |  7  0  0  |  8  0
3439             9  0 10  | 11  0  0  | 12  0
3440     -------------------------------------
3441            13  0 14  | 15 16 17  |  0  0
3442     Proc1   0 18  0  | 19 20 21  |  0  0
3443             0  0  0  | 22 23  0  | 24  0
3444     -------------------------------------
3445     Proc2  25 26 27  |  0  0 28  | 29  0
3446            30  0  0  | 31 32 33  |  0 34
3447 .ve
3448 
3449    This can be represented as a collection of submatrices as:
3450 
3451 .vb
3452       A B C
3453       D E F
3454       G H I
3455 .ve
3456 
3457    Where the submatrices A,B,C are owned by proc0, D,E,F are
3458    owned by proc1, G,H,I are owned by proc2.
3459 
3460    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3461    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3462    The 'M','N' parameters are 8,8, and have the same values on all procs.
3463 
3464    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3465    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3466    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3467    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3468    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3469    matrix, ans [DF] as another SeqAIJ matrix.
3470 
3471    When d_nz, o_nz parameters are specified, d_nz storage elements are
3472    allocated for every row of the local diagonal submatrix, and o_nz
3473    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3474    One way to choose d_nz and o_nz is to use the max nonzerors per local
3475    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3476    In this case, the values of d_nz,o_nz are:
3477 .vb
3478      proc0 : dnz = 2, o_nz = 2
3479      proc1 : dnz = 3, o_nz = 2
3480      proc2 : dnz = 1, o_nz = 4
3481 .ve
3482    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3483    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3484    for proc3. i.e we are using 12+15+10=37 storage locations to store
3485    34 values.
3486 
3487    When d_nnz, o_nnz parameters are specified, the storage is specified
3488    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3489    In the above case the values for d_nnz,o_nnz are:
3490 .vb
3491      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3492      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3493      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3494 .ve
3495    Here the space allocated is sum of all the above values i.e 34, and
3496    hence pre-allocation is perfect.
3497 
3498    Level: intermediate
3499 
3500 .keywords: matrix, aij, compressed row, sparse, parallel
3501 
3502 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3503           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3504 @*/
3505 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3506 {
3507   PetscErrorCode ierr;
3508 
3509   PetscFunctionBegin;
3510   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3511   PetscValidType(B,1);
3512   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3513   PetscFunctionReturn(0);
3514 }
3515 
3516 #undef __FUNCT__
3517 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3518 /*@
3519      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3520          CSR format the local rows.
3521 
3522    Collective on MPI_Comm
3523 
3524    Input Parameters:
3525 +  comm - MPI communicator
3526 .  m - number of local rows (Cannot be PETSC_DECIDE)
3527 .  n - This value should be the same as the local size used in creating the
3528        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3529        calculated if N is given) For square matrices n is almost always m.
3530 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3531 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3532 .   i - row indices
3533 .   j - column indices
3534 -   a - matrix values
3535 
3536    Output Parameter:
3537 .   mat - the matrix
3538 
3539    Level: intermediate
3540 
3541    Notes:
3542        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3543      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3544      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3545 
3546        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3547 
3548        The format which is used for the sparse matrix input, is equivalent to a
3549     row-major ordering.. i.e for the following matrix, the input data expected is
3550     as shown
3551 
3552 $        1 0 0
3553 $        2 0 3     P0
3554 $       -------
3555 $        4 5 6     P1
3556 $
3557 $     Process0 [P0]: rows_owned=[0,1]
3558 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3559 $        j =  {0,0,2}  [size = 3]
3560 $        v =  {1,2,3}  [size = 3]
3561 $
3562 $     Process1 [P1]: rows_owned=[2]
3563 $        i =  {0,3}    [size = nrow+1  = 1+1]
3564 $        j =  {0,1,2}  [size = 3]
3565 $        v =  {4,5,6}  [size = 3]
3566 
3567 .keywords: matrix, aij, compressed row, sparse, parallel
3568 
3569 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3570           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3571 @*/
3572 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3573 {
3574   PetscErrorCode ierr;
3575 
3576   PetscFunctionBegin;
3577   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3578   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3579   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3580   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3581   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3582   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3583   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3584   PetscFunctionReturn(0);
3585 }
3586 
3587 #undef __FUNCT__
3588 #define __FUNCT__ "MatCreateAIJ"
3589 /*@C
3590    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3591    (the default parallel PETSc format).  For good matrix assembly performance
3592    the user should preallocate the matrix storage by setting the parameters
3593    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3594    performance can be increased by more than a factor of 50.
3595 
3596    Collective on MPI_Comm
3597 
3598    Input Parameters:
3599 +  comm - MPI communicator
3600 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3601            This value should be the same as the local size used in creating the
3602            y vector for the matrix-vector product y = Ax.
3603 .  n - This value should be the same as the local size used in creating the
3604        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3605        calculated if N is given) For square matrices n is almost always m.
3606 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3607 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3608 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3609            (same value is used for all local rows)
3610 .  d_nnz - array containing the number of nonzeros in the various rows of the
3611            DIAGONAL portion of the local submatrix (possibly different for each row)
3612            or NULL, if d_nz is used to specify the nonzero structure.
3613            The size of this array is equal to the number of local rows, i.e 'm'.
3614 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3615            submatrix (same value is used for all local rows).
3616 -  o_nnz - array containing the number of nonzeros in the various rows of the
3617            OFF-DIAGONAL portion of the local submatrix (possibly different for
3618            each row) or NULL, if o_nz is used to specify the nonzero
3619            structure. The size of this array is equal to the number
3620            of local rows, i.e 'm'.
3621 
3622    Output Parameter:
3623 .  A - the matrix
3624 
3625    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3626    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3627    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3628 
3629    Notes:
3630    If the *_nnz parameter is given then the *_nz parameter is ignored
3631 
3632    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3633    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3634    storage requirements for this matrix.
3635 
3636    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3637    processor than it must be used on all processors that share the object for
3638    that argument.
3639 
3640    The user MUST specify either the local or global matrix dimensions
3641    (possibly both).
3642 
3643    The parallel matrix is partitioned across processors such that the
3644    first m0 rows belong to process 0, the next m1 rows belong to
3645    process 1, the next m2 rows belong to process 2 etc.. where
3646    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3647    values corresponding to [m x N] submatrix.
3648 
3649    The columns are logically partitioned with the n0 columns belonging
3650    to 0th partition, the next n1 columns belonging to the next
3651    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3652 
3653    The DIAGONAL portion of the local submatrix on any given processor
3654    is the submatrix corresponding to the rows and columns m,n
3655    corresponding to the given processor. i.e diagonal matrix on
3656    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3657    etc. The remaining portion of the local submatrix [m x (N-n)]
3658    constitute the OFF-DIAGONAL portion. The example below better
3659    illustrates this concept.
3660 
3661    For a square global matrix we define each processor's diagonal portion
3662    to be its local rows and the corresponding columns (a square submatrix);
3663    each processor's off-diagonal portion encompasses the remainder of the
3664    local matrix (a rectangular submatrix).
3665 
3666    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3667 
3668    When calling this routine with a single process communicator, a matrix of
3669    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3670    type of communicator, use the construction mechanism:
3671      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3672 
3673    By default, this format uses inodes (identical nodes) when possible.
3674    We search for consecutive rows with the same nonzero structure, thereby
3675    reusing matrix information to achieve increased efficiency.
3676 
3677    Options Database Keys:
3678 +  -mat_no_inode  - Do not use inodes
3679 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3680 -  -mat_aij_oneindex - Internally use indexing starting at 1
3681         rather than 0.  Note that when calling MatSetValues(),
3682         the user still MUST index entries starting at 0!
3683 
3684 
3685    Example usage:
3686 
3687    Consider the following 8x8 matrix with 34 non-zero values, that is
3688    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3689    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3690    as follows:
3691 
3692 .vb
3693             1  2  0  |  0  3  0  |  0  4
3694     Proc0   0  5  6  |  7  0  0  |  8  0
3695             9  0 10  | 11  0  0  | 12  0
3696     -------------------------------------
3697            13  0 14  | 15 16 17  |  0  0
3698     Proc1   0 18  0  | 19 20 21  |  0  0
3699             0  0  0  | 22 23  0  | 24  0
3700     -------------------------------------
3701     Proc2  25 26 27  |  0  0 28  | 29  0
3702            30  0  0  | 31 32 33  |  0 34
3703 .ve
3704 
3705    This can be represented as a collection of submatrices as:
3706 
3707 .vb
3708       A B C
3709       D E F
3710       G H I
3711 .ve
3712 
3713    Where the submatrices A,B,C are owned by proc0, D,E,F are
3714    owned by proc1, G,H,I are owned by proc2.
3715 
3716    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3717    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3718    The 'M','N' parameters are 8,8, and have the same values on all procs.
3719 
3720    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3721    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3722    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3723    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3724    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3725    matrix, ans [DF] as another SeqAIJ matrix.
3726 
3727    When d_nz, o_nz parameters are specified, d_nz storage elements are
3728    allocated for every row of the local diagonal submatrix, and o_nz
3729    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3730    One way to choose d_nz and o_nz is to use the max nonzerors per local
3731    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3732    In this case, the values of d_nz,o_nz are:
3733 .vb
3734      proc0 : dnz = 2, o_nz = 2
3735      proc1 : dnz = 3, o_nz = 2
3736      proc2 : dnz = 1, o_nz = 4
3737 .ve
3738    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3739    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3740    for proc3. i.e we are using 12+15+10=37 storage locations to store
3741    34 values.
3742 
3743    When d_nnz, o_nnz parameters are specified, the storage is specified
3744    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3745    In the above case the values for d_nnz,o_nnz are:
3746 .vb
3747      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3748      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3749      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3750 .ve
3751    Here the space allocated is sum of all the above values i.e 34, and
3752    hence pre-allocation is perfect.
3753 
3754    Level: intermediate
3755 
3756 .keywords: matrix, aij, compressed row, sparse, parallel
3757 
3758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3759           MPIAIJ, MatCreateMPIAIJWithArrays()
3760 @*/
3761 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3762 {
3763   PetscErrorCode ierr;
3764   PetscMPIInt    size;
3765 
3766   PetscFunctionBegin;
3767   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3768   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3769   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3770   if (size > 1) {
3771     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3772     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3773   } else {
3774     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3775     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3776   }
3777   PetscFunctionReturn(0);
3778 }
3779 
3780 #undef __FUNCT__
3781 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3782 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3783 {
3784   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3785   PetscBool      flg;
3786   PetscErrorCode ierr;
3787 
3788   PetscFunctionBegin;
3789   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3790   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input");
3791   if (Ad)     *Ad     = a->A;
3792   if (Ao)     *Ao     = a->B;
3793   if (colmap) *colmap = a->garray;
3794   PetscFunctionReturn(0);
3795 }
3796 
3797 #undef __FUNCT__
3798 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3799 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3800 {
3801   PetscErrorCode ierr;
3802   PetscInt       i;
3803   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3804 
3805   PetscFunctionBegin;
3806   if (coloring->ctype == IS_COLORING_GLOBAL) {
3807     ISColoringValue *allcolors,*colors;
3808     ISColoring      ocoloring;
3809 
3810     /* set coloring for diagonal portion */
3811     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3812 
3813     /* set coloring for off-diagonal portion */
3814     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3815     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3816     for (i=0; i<a->B->cmap->n; i++) {
3817       colors[i] = allcolors[a->garray[i]];
3818     }
3819     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3820     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3821     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3822     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3823   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3824     ISColoringValue *colors;
3825     PetscInt        *larray;
3826     ISColoring      ocoloring;
3827 
3828     /* set coloring for diagonal portion */
3829     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3830     for (i=0; i<a->A->cmap->n; i++) {
3831       larray[i] = i + A->cmap->rstart;
3832     }
3833     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3834     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3835     for (i=0; i<a->A->cmap->n; i++) {
3836       colors[i] = coloring->colors[larray[i]];
3837     }
3838     ierr = PetscFree(larray);CHKERRQ(ierr);
3839     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3840     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3841     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3842 
3843     /* set coloring for off-diagonal portion */
3844     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3845     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3846     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3847     for (i=0; i<a->B->cmap->n; i++) {
3848       colors[i] = coloring->colors[larray[i]];
3849     }
3850     ierr = PetscFree(larray);CHKERRQ(ierr);
3851     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3852     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3853     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3854   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3855   PetscFunctionReturn(0);
3856 }
3857 
3858 #undef __FUNCT__
3859 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3860 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3861 {
3862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3863   PetscErrorCode ierr;
3864 
3865   PetscFunctionBegin;
3866   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3867   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3868   PetscFunctionReturn(0);
3869 }
3870 
3871 #undef __FUNCT__
3872 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3873 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3874 {
3875   PetscErrorCode ierr;
3876   PetscInt       m,N,i,rstart,nnz,Ii;
3877   PetscInt       *indx;
3878   PetscScalar    *values;
3879 
3880   PetscFunctionBegin;
3881   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3882   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3883     PetscInt       *dnz,*onz,sum,bs,cbs;
3884 
3885     if (n == PETSC_DECIDE) {
3886       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3887     }
3888     /* Check sum(n) = N */
3889     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3890     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3891 
3892     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3893     rstart -= m;
3894 
3895     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3896     for (i=0; i<m; i++) {
3897       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3898       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3899       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3900     }
3901 
3902     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3903     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3904     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3905     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3906     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3907     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3908     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3909   }
3910 
3911   /* numeric phase */
3912   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3913   for (i=0; i<m; i++) {
3914     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3915     Ii   = i + rstart;
3916     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3917     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3918   }
3919   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3920   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3921   PetscFunctionReturn(0);
3922 }
3923 
3924 #undef __FUNCT__
3925 #define __FUNCT__ "MatFileSplit"
3926 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3927 {
3928   PetscErrorCode    ierr;
3929   PetscMPIInt       rank;
3930   PetscInt          m,N,i,rstart,nnz;
3931   size_t            len;
3932   const PetscInt    *indx;
3933   PetscViewer       out;
3934   char              *name;
3935   Mat               B;
3936   const PetscScalar *values;
3937 
3938   PetscFunctionBegin;
3939   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3940   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3941   /* Should this be the type of the diagonal block of A? */
3942   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3943   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3944   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3945   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3946   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3947   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3948   for (i=0; i<m; i++) {
3949     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3950     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3951     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3952   }
3953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3955 
3956   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3957   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3958   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3959   sprintf(name,"%s.%d",outfile,rank);
3960   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3961   ierr = PetscFree(name);CHKERRQ(ierr);
3962   ierr = MatView(B,out);CHKERRQ(ierr);
3963   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3964   ierr = MatDestroy(&B);CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3971 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3972 {
3973   PetscErrorCode      ierr;
3974   Mat_Merge_SeqsToMPI *merge;
3975   PetscContainer      container;
3976 
3977   PetscFunctionBegin;
3978   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3979   if (container) {
3980     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3981     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3982     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3983     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3984     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3993     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3994     ierr = PetscFree(merge);CHKERRQ(ierr);
3995     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3996   }
3997   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3998   PetscFunctionReturn(0);
3999 }
4000 
4001 #include <../src/mat/utils/freespace.h>
4002 #include <petscbt.h>
4003 
4004 #undef __FUNCT__
4005 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4006 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4007 {
4008   PetscErrorCode      ierr;
4009   MPI_Comm            comm;
4010   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4011   PetscMPIInt         size,rank,taga,*len_s;
4012   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4013   PetscInt            proc,m;
4014   PetscInt            **buf_ri,**buf_rj;
4015   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4016   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4017   MPI_Request         *s_waits,*r_waits;
4018   MPI_Status          *status;
4019   MatScalar           *aa=a->a;
4020   MatScalar           **abuf_r,*ba_i;
4021   Mat_Merge_SeqsToMPI *merge;
4022   PetscContainer      container;
4023 
4024   PetscFunctionBegin;
4025   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4026   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4027 
4028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4029   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4030 
4031   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4032   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4033 
4034   bi     = merge->bi;
4035   bj     = merge->bj;
4036   buf_ri = merge->buf_ri;
4037   buf_rj = merge->buf_rj;
4038 
4039   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4040   owners = merge->rowmap->range;
4041   len_s  = merge->len_s;
4042 
4043   /* send and recv matrix values */
4044   /*-----------------------------*/
4045   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4046   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4047 
4048   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4049   for (proc=0,k=0; proc<size; proc++) {
4050     if (!len_s[proc]) continue;
4051     i    = owners[proc];
4052     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4053     k++;
4054   }
4055 
4056   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4057   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4058   ierr = PetscFree(status);CHKERRQ(ierr);
4059 
4060   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4061   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4062 
4063   /* insert mat values of mpimat */
4064   /*----------------------------*/
4065   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4066   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4067 
4068   for (k=0; k<merge->nrecv; k++) {
4069     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4070     nrows       = *(buf_ri_k[k]);
4071     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4072     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4073   }
4074 
4075   /* set values of ba */
4076   m = merge->rowmap->n;
4077   for (i=0; i<m; i++) {
4078     arow = owners[rank] + i;
4079     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4080     bnzi = bi[i+1] - bi[i];
4081     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4082 
4083     /* add local non-zero vals of this proc's seqmat into ba */
4084     anzi   = ai[arow+1] - ai[arow];
4085     aj     = a->j + ai[arow];
4086     aa     = a->a + ai[arow];
4087     nextaj = 0;
4088     for (j=0; nextaj<anzi; j++) {
4089       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4090         ba_i[j] += aa[nextaj++];
4091       }
4092     }
4093 
4094     /* add received vals into ba */
4095     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4096       /* i-th row */
4097       if (i == *nextrow[k]) {
4098         anzi   = *(nextai[k]+1) - *nextai[k];
4099         aj     = buf_rj[k] + *(nextai[k]);
4100         aa     = abuf_r[k] + *(nextai[k]);
4101         nextaj = 0;
4102         for (j=0; nextaj<anzi; j++) {
4103           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4104             ba_i[j] += aa[nextaj++];
4105           }
4106         }
4107         nextrow[k]++; nextai[k]++;
4108       }
4109     }
4110     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4111   }
4112   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4113   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4114 
4115   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4116   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4117   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4118   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4119   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4120   PetscFunctionReturn(0);
4121 }
4122 
4123 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4124 
4125 #undef __FUNCT__
4126 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4127 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4128 {
4129   PetscErrorCode      ierr;
4130   Mat                 B_mpi;
4131   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4132   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4133   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4134   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4135   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4136   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4137   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4138   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4139   MPI_Status          *status;
4140   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4141   PetscBT             lnkbt;
4142   Mat_Merge_SeqsToMPI *merge;
4143   PetscContainer      container;
4144 
4145   PetscFunctionBegin;
4146   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4147 
4148   /* make sure it is a PETSc comm */
4149   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4150   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4151   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4152 
4153   ierr = PetscNew(&merge);CHKERRQ(ierr);
4154   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4155 
4156   /* determine row ownership */
4157   /*---------------------------------------------------------*/
4158   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4159   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4160   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4161   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4162   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4163   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4164   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4165 
4166   m      = merge->rowmap->n;
4167   owners = merge->rowmap->range;
4168 
4169   /* determine the number of messages to send, their lengths */
4170   /*---------------------------------------------------------*/
4171   len_s = merge->len_s;
4172 
4173   len          = 0; /* length of buf_si[] */
4174   merge->nsend = 0;
4175   for (proc=0; proc<size; proc++) {
4176     len_si[proc] = 0;
4177     if (proc == rank) {
4178       len_s[proc] = 0;
4179     } else {
4180       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4181       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4182     }
4183     if (len_s[proc]) {
4184       merge->nsend++;
4185       nrows = 0;
4186       for (i=owners[proc]; i<owners[proc+1]; i++) {
4187         if (ai[i+1] > ai[i]) nrows++;
4188       }
4189       len_si[proc] = 2*(nrows+1);
4190       len         += len_si[proc];
4191     }
4192   }
4193 
4194   /* determine the number and length of messages to receive for ij-structure */
4195   /*-------------------------------------------------------------------------*/
4196   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4197   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4198 
4199   /* post the Irecv of j-structure */
4200   /*-------------------------------*/
4201   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4202   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4203 
4204   /* post the Isend of j-structure */
4205   /*--------------------------------*/
4206   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4207 
4208   for (proc=0, k=0; proc<size; proc++) {
4209     if (!len_s[proc]) continue;
4210     i    = owners[proc];
4211     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4212     k++;
4213   }
4214 
4215   /* receives and sends of j-structure are complete */
4216   /*------------------------------------------------*/
4217   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4218   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4219 
4220   /* send and recv i-structure */
4221   /*---------------------------*/
4222   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4223   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4224 
4225   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4226   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4227   for (proc=0,k=0; proc<size; proc++) {
4228     if (!len_s[proc]) continue;
4229     /* form outgoing message for i-structure:
4230          buf_si[0]:                 nrows to be sent
4231                [1:nrows]:           row index (global)
4232                [nrows+1:2*nrows+1]: i-structure index
4233     */
4234     /*-------------------------------------------*/
4235     nrows       = len_si[proc]/2 - 1;
4236     buf_si_i    = buf_si + nrows+1;
4237     buf_si[0]   = nrows;
4238     buf_si_i[0] = 0;
4239     nrows       = 0;
4240     for (i=owners[proc]; i<owners[proc+1]; i++) {
4241       anzi = ai[i+1] - ai[i];
4242       if (anzi) {
4243         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4244         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4245         nrows++;
4246       }
4247     }
4248     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4249     k++;
4250     buf_si += len_si[proc];
4251   }
4252 
4253   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4254   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4255 
4256   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4257   for (i=0; i<merge->nrecv; i++) {
4258     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4259   }
4260 
4261   ierr = PetscFree(len_si);CHKERRQ(ierr);
4262   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4263   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4264   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4265   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4266   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4267   ierr = PetscFree(status);CHKERRQ(ierr);
4268 
4269   /* compute a local seq matrix in each processor */
4270   /*----------------------------------------------*/
4271   /* allocate bi array and free space for accumulating nonzero column info */
4272   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4273   bi[0] = 0;
4274 
4275   /* create and initialize a linked list */
4276   nlnk = N+1;
4277   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4278 
4279   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4280   len  = ai[owners[rank+1]] - ai[owners[rank]];
4281   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4282 
4283   current_space = free_space;
4284 
4285   /* determine symbolic info for each local row */
4286   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4287 
4288   for (k=0; k<merge->nrecv; k++) {
4289     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4290     nrows       = *buf_ri_k[k];
4291     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4292     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4293   }
4294 
4295   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4296   len  = 0;
4297   for (i=0; i<m; i++) {
4298     bnzi = 0;
4299     /* add local non-zero cols of this proc's seqmat into lnk */
4300     arow  = owners[rank] + i;
4301     anzi  = ai[arow+1] - ai[arow];
4302     aj    = a->j + ai[arow];
4303     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4304     bnzi += nlnk;
4305     /* add received col data into lnk */
4306     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4307       if (i == *nextrow[k]) { /* i-th row */
4308         anzi  = *(nextai[k]+1) - *nextai[k];
4309         aj    = buf_rj[k] + *nextai[k];
4310         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4311         bnzi += nlnk;
4312         nextrow[k]++; nextai[k]++;
4313       }
4314     }
4315     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4316 
4317     /* if free space is not available, make more free space */
4318     if (current_space->local_remaining<bnzi) {
4319       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4320       nspacedouble++;
4321     }
4322     /* copy data into free space, then initialize lnk */
4323     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4324     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4325 
4326     current_space->array           += bnzi;
4327     current_space->local_used      += bnzi;
4328     current_space->local_remaining -= bnzi;
4329 
4330     bi[i+1] = bi[i] + bnzi;
4331   }
4332 
4333   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4334 
4335   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4336   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4337   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4338 
4339   /* create symbolic parallel matrix B_mpi */
4340   /*---------------------------------------*/
4341   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4342   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4343   if (n==PETSC_DECIDE) {
4344     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4345   } else {
4346     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4347   }
4348   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4349   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4350   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4351   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4352   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4353 
4354   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4355   B_mpi->assembled    = PETSC_FALSE;
4356   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4357   merge->bi           = bi;
4358   merge->bj           = bj;
4359   merge->buf_ri       = buf_ri;
4360   merge->buf_rj       = buf_rj;
4361   merge->coi          = NULL;
4362   merge->coj          = NULL;
4363   merge->owners_co    = NULL;
4364 
4365   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4366 
4367   /* attach the supporting struct to B_mpi for reuse */
4368   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4369   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4370   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4371   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4372   *mpimat = B_mpi;
4373 
4374   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4375   PetscFunctionReturn(0);
4376 }
4377 
4378 #undef __FUNCT__
4379 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4380 /*@C
4381       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4382                  matrices from each processor
4383 
4384     Collective on MPI_Comm
4385 
4386    Input Parameters:
4387 +    comm - the communicators the parallel matrix will live on
4388 .    seqmat - the input sequential matrices
4389 .    m - number of local rows (or PETSC_DECIDE)
4390 .    n - number of local columns (or PETSC_DECIDE)
4391 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4392 
4393    Output Parameter:
4394 .    mpimat - the parallel matrix generated
4395 
4396     Level: advanced
4397 
4398    Notes:
4399      The dimensions of the sequential matrix in each processor MUST be the same.
4400      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4401      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4402 @*/
4403 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4404 {
4405   PetscErrorCode ierr;
4406   PetscMPIInt    size;
4407 
4408   PetscFunctionBegin;
4409   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4410   if (size == 1) {
4411     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4412     if (scall == MAT_INITIAL_MATRIX) {
4413       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4414     } else {
4415       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4416     }
4417     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4418     PetscFunctionReturn(0);
4419   }
4420   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4421   if (scall == MAT_INITIAL_MATRIX) {
4422     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4423   }
4424   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4425   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 #undef __FUNCT__
4430 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4431 /*@
4432      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4433           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4434           with MatGetSize()
4435 
4436     Not Collective
4437 
4438    Input Parameters:
4439 +    A - the matrix
4440 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4441 
4442    Output Parameter:
4443 .    A_loc - the local sequential matrix generated
4444 
4445     Level: developer
4446 
4447 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4448 
4449 @*/
4450 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4451 {
4452   PetscErrorCode ierr;
4453   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4454   Mat_SeqAIJ     *mat,*a,*b;
4455   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4456   MatScalar      *aa,*ba,*cam;
4457   PetscScalar    *ca;
4458   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4459   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4460   PetscBool      match;
4461   MPI_Comm       comm;
4462   PetscMPIInt    size;
4463 
4464   PetscFunctionBegin;
4465   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4466   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4467   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4468   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4469   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4470 
4471   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4472   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4473   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4474   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4475   aa = a->a; ba = b->a;
4476   if (scall == MAT_INITIAL_MATRIX) {
4477     if (size == 1) {
4478       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4479       PetscFunctionReturn(0);
4480     }
4481 
4482     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4483     ci[0] = 0;
4484     for (i=0; i<am; i++) {
4485       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4486     }
4487     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4488     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4489     k    = 0;
4490     for (i=0; i<am; i++) {
4491       ncols_o = bi[i+1] - bi[i];
4492       ncols_d = ai[i+1] - ai[i];
4493       /* off-diagonal portion of A */
4494       for (jo=0; jo<ncols_o; jo++) {
4495         col = cmap[*bj];
4496         if (col >= cstart) break;
4497         cj[k]   = col; bj++;
4498         ca[k++] = *ba++;
4499       }
4500       /* diagonal portion of A */
4501       for (j=0; j<ncols_d; j++) {
4502         cj[k]   = cstart + *aj++;
4503         ca[k++] = *aa++;
4504       }
4505       /* off-diagonal portion of A */
4506       for (j=jo; j<ncols_o; j++) {
4507         cj[k]   = cmap[*bj++];
4508         ca[k++] = *ba++;
4509       }
4510     }
4511     /* put together the new matrix */
4512     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4513     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4514     /* Since these are PETSc arrays, change flags to free them as necessary. */
4515     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4516     mat->free_a  = PETSC_TRUE;
4517     mat->free_ij = PETSC_TRUE;
4518     mat->nonew   = 0;
4519   } else if (scall == MAT_REUSE_MATRIX) {
4520     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4521     ci = mat->i; cj = mat->j; cam = mat->a;
4522     for (i=0; i<am; i++) {
4523       /* off-diagonal portion of A */
4524       ncols_o = bi[i+1] - bi[i];
4525       for (jo=0; jo<ncols_o; jo++) {
4526         col = cmap[*bj];
4527         if (col >= cstart) break;
4528         *cam++ = *ba++; bj++;
4529       }
4530       /* diagonal portion of A */
4531       ncols_d = ai[i+1] - ai[i];
4532       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4533       /* off-diagonal portion of A */
4534       for (j=jo; j<ncols_o; j++) {
4535         *cam++ = *ba++; bj++;
4536       }
4537     }
4538   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4539   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4540   PetscFunctionReturn(0);
4541 }
4542 
4543 #undef __FUNCT__
4544 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4545 /*@C
4546      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4547 
4548     Not Collective
4549 
4550    Input Parameters:
4551 +    A - the matrix
4552 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4553 -    row, col - index sets of rows and columns to extract (or NULL)
4554 
4555    Output Parameter:
4556 .    A_loc - the local sequential matrix generated
4557 
4558     Level: developer
4559 
4560 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4561 
4562 @*/
4563 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4564 {
4565   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4566   PetscErrorCode ierr;
4567   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4568   IS             isrowa,iscola;
4569   Mat            *aloc;
4570   PetscBool      match;
4571 
4572   PetscFunctionBegin;
4573   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4574   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4575   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4576   if (!row) {
4577     start = A->rmap->rstart; end = A->rmap->rend;
4578     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4579   } else {
4580     isrowa = *row;
4581   }
4582   if (!col) {
4583     start = A->cmap->rstart;
4584     cmap  = a->garray;
4585     nzA   = a->A->cmap->n;
4586     nzB   = a->B->cmap->n;
4587     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4588     ncols = 0;
4589     for (i=0; i<nzB; i++) {
4590       if (cmap[i] < start) idx[ncols++] = cmap[i];
4591       else break;
4592     }
4593     imark = i;
4594     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4595     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4596     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4597   } else {
4598     iscola = *col;
4599   }
4600   if (scall != MAT_INITIAL_MATRIX) {
4601     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4602     aloc[0] = *A_loc;
4603   }
4604   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4605   *A_loc = aloc[0];
4606   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4607   if (!row) {
4608     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4609   }
4610   if (!col) {
4611     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4612   }
4613   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4614   PetscFunctionReturn(0);
4615 }
4616 
4617 #undef __FUNCT__
4618 #define __FUNCT__ "MatGetBrowsOfAcols"
4619 /*@C
4620     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4621 
4622     Collective on Mat
4623 
4624    Input Parameters:
4625 +    A,B - the matrices in mpiaij format
4626 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4627 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4628 
4629    Output Parameter:
4630 +    rowb, colb - index sets of rows and columns of B to extract
4631 -    B_seq - the sequential matrix generated
4632 
4633     Level: developer
4634 
4635 @*/
4636 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4637 {
4638   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4639   PetscErrorCode ierr;
4640   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4641   IS             isrowb,iscolb;
4642   Mat            *bseq=NULL;
4643 
4644   PetscFunctionBegin;
4645   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4646     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4647   }
4648   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4649 
4650   if (scall == MAT_INITIAL_MATRIX) {
4651     start = A->cmap->rstart;
4652     cmap  = a->garray;
4653     nzA   = a->A->cmap->n;
4654     nzB   = a->B->cmap->n;
4655     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4656     ncols = 0;
4657     for (i=0; i<nzB; i++) {  /* row < local row index */
4658       if (cmap[i] < start) idx[ncols++] = cmap[i];
4659       else break;
4660     }
4661     imark = i;
4662     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4663     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4664     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4665     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4666   } else {
4667     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4668     isrowb  = *rowb; iscolb = *colb;
4669     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4670     bseq[0] = *B_seq;
4671   }
4672   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4673   *B_seq = bseq[0];
4674   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4675   if (!rowb) {
4676     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4677   } else {
4678     *rowb = isrowb;
4679   }
4680   if (!colb) {
4681     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4682   } else {
4683     *colb = iscolb;
4684   }
4685   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4686   PetscFunctionReturn(0);
4687 }
4688 
4689 #undef __FUNCT__
4690 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4691 /*
4692     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4693     of the OFF-DIAGONAL portion of local A
4694 
4695     Collective on Mat
4696 
4697    Input Parameters:
4698 +    A,B - the matrices in mpiaij format
4699 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4700 
4701    Output Parameter:
4702 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4703 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4704 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4705 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4706 
4707     Level: developer
4708 
4709 */
4710 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4711 {
4712   VecScatter_MPI_General *gen_to,*gen_from;
4713   PetscErrorCode         ierr;
4714   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4715   Mat_SeqAIJ             *b_oth;
4716   VecScatter             ctx =a->Mvctx;
4717   MPI_Comm               comm;
4718   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4719   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4720   PetscScalar            *rvalues,*svalues;
4721   MatScalar              *b_otha,*bufa,*bufA;
4722   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4723   MPI_Request            *rwaits = NULL,*swaits = NULL;
4724   MPI_Status             *sstatus,rstatus;
4725   PetscMPIInt            jj,size;
4726   PetscInt               *cols,sbs,rbs;
4727   PetscScalar            *vals;
4728 
4729   PetscFunctionBegin;
4730   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4731   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4732 
4733   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4734     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4735   }
4736   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4737   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4738 
4739   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4740   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4741   rvalues  = gen_from->values; /* holds the length of receiving row */
4742   svalues  = gen_to->values;   /* holds the length of sending row */
4743   nrecvs   = gen_from->n;
4744   nsends   = gen_to->n;
4745 
4746   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4747   srow    = gen_to->indices;    /* local row index to be sent */
4748   sstarts = gen_to->starts;
4749   sprocs  = gen_to->procs;
4750   sstatus = gen_to->sstatus;
4751   sbs     = gen_to->bs;
4752   rstarts = gen_from->starts;
4753   rprocs  = gen_from->procs;
4754   rbs     = gen_from->bs;
4755 
4756   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4757   if (scall == MAT_INITIAL_MATRIX) {
4758     /* i-array */
4759     /*---------*/
4760     /*  post receives */
4761     for (i=0; i<nrecvs; i++) {
4762       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4763       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4764       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4765     }
4766 
4767     /* pack the outgoing message */
4768     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4769 
4770     sstartsj[0] = 0;
4771     rstartsj[0] = 0;
4772     len         = 0; /* total length of j or a array to be sent */
4773     k           = 0;
4774     for (i=0; i<nsends; i++) {
4775       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4776       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4777       for (j=0; j<nrows; j++) {
4778         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4779         for (l=0; l<sbs; l++) {
4780           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4781 
4782           rowlen[j*sbs+l] = ncols;
4783 
4784           len += ncols;
4785           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4786         }
4787         k++;
4788       }
4789       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4790 
4791       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4792     }
4793     /* recvs and sends of i-array are completed */
4794     i = nrecvs;
4795     while (i--) {
4796       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4797     }
4798     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4799 
4800     /* allocate buffers for sending j and a arrays */
4801     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4802     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4803 
4804     /* create i-array of B_oth */
4805     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4806 
4807     b_othi[0] = 0;
4808     len       = 0; /* total length of j or a array to be received */
4809     k         = 0;
4810     for (i=0; i<nrecvs; i++) {
4811       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4812       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4813       for (j=0; j<nrows; j++) {
4814         b_othi[k+1] = b_othi[k] + rowlen[j];
4815         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4816         k++;
4817       }
4818       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4819     }
4820 
4821     /* allocate space for j and a arrrays of B_oth */
4822     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4823     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4824 
4825     /* j-array */
4826     /*---------*/
4827     /*  post receives of j-array */
4828     for (i=0; i<nrecvs; i++) {
4829       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4830       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4831     }
4832 
4833     /* pack the outgoing message j-array */
4834     k = 0;
4835     for (i=0; i<nsends; i++) {
4836       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4837       bufJ  = bufj+sstartsj[i];
4838       for (j=0; j<nrows; j++) {
4839         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4840         for (ll=0; ll<sbs; ll++) {
4841           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4842           for (l=0; l<ncols; l++) {
4843             *bufJ++ = cols[l];
4844           }
4845           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4846         }
4847       }
4848       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4849     }
4850 
4851     /* recvs and sends of j-array are completed */
4852     i = nrecvs;
4853     while (i--) {
4854       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4855     }
4856     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4857   } else if (scall == MAT_REUSE_MATRIX) {
4858     sstartsj = *startsj_s;
4859     rstartsj = *startsj_r;
4860     bufa     = *bufa_ptr;
4861     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4862     b_otha   = b_oth->a;
4863   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4864 
4865   /* a-array */
4866   /*---------*/
4867   /*  post receives of a-array */
4868   for (i=0; i<nrecvs; i++) {
4869     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4870     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4871   }
4872 
4873   /* pack the outgoing message a-array */
4874   k = 0;
4875   for (i=0; i<nsends; i++) {
4876     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4877     bufA  = bufa+sstartsj[i];
4878     for (j=0; j<nrows; j++) {
4879       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4880       for (ll=0; ll<sbs; ll++) {
4881         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4882         for (l=0; l<ncols; l++) {
4883           *bufA++ = vals[l];
4884         }
4885         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4886       }
4887     }
4888     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4889   }
4890   /* recvs and sends of a-array are completed */
4891   i = nrecvs;
4892   while (i--) {
4893     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4894   }
4895   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4896   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4897 
4898   if (scall == MAT_INITIAL_MATRIX) {
4899     /* put together the new matrix */
4900     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4901 
4902     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4903     /* Since these are PETSc arrays, change flags to free them as necessary. */
4904     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4905     b_oth->free_a  = PETSC_TRUE;
4906     b_oth->free_ij = PETSC_TRUE;
4907     b_oth->nonew   = 0;
4908 
4909     ierr = PetscFree(bufj);CHKERRQ(ierr);
4910     if (!startsj_s || !bufa_ptr) {
4911       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4912       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4913     } else {
4914       *startsj_s = sstartsj;
4915       *startsj_r = rstartsj;
4916       *bufa_ptr  = bufa;
4917     }
4918   }
4919   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4920   PetscFunctionReturn(0);
4921 }
4922 
4923 #undef __FUNCT__
4924 #define __FUNCT__ "MatGetCommunicationStructs"
4925 /*@C
4926   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4927 
4928   Not Collective
4929 
4930   Input Parameters:
4931 . A - The matrix in mpiaij format
4932 
4933   Output Parameter:
4934 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4935 . colmap - A map from global column index to local index into lvec
4936 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4937 
4938   Level: developer
4939 
4940 @*/
4941 #if defined(PETSC_USE_CTABLE)
4942 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4943 #else
4944 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4945 #endif
4946 {
4947   Mat_MPIAIJ *a;
4948 
4949   PetscFunctionBegin;
4950   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4951   PetscValidPointer(lvec, 2);
4952   PetscValidPointer(colmap, 3);
4953   PetscValidPointer(multScatter, 4);
4954   a = (Mat_MPIAIJ*) A->data;
4955   if (lvec) *lvec = a->lvec;
4956   if (colmap) *colmap = a->colmap;
4957   if (multScatter) *multScatter = a->Mvctx;
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4963 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4964 #if defined(PETSC_HAVE_ELEMENTAL)
4965 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4966 #endif
4967 
4968 #undef __FUNCT__
4969 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4970 /*
4971     Computes (B'*A')' since computing B*A directly is untenable
4972 
4973                n                       p                          p
4974         (              )       (              )         (                  )
4975       m (      A       )  *  n (       B      )   =   m (         C        )
4976         (              )       (              )         (                  )
4977 
4978 */
4979 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4980 {
4981   PetscErrorCode ierr;
4982   Mat            At,Bt,Ct;
4983 
4984   PetscFunctionBegin;
4985   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4986   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4987   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4988   ierr = MatDestroy(&At);CHKERRQ(ierr);
4989   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4990   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4991   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 #undef __FUNCT__
4996 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4997 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4998 {
4999   PetscErrorCode ierr;
5000   PetscInt       m=A->rmap->n,n=B->cmap->n;
5001   Mat            Cmat;
5002 
5003   PetscFunctionBegin;
5004   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5005   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5006   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5007   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5008   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5009   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5010   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5011   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5012 
5013   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5014 
5015   *C = Cmat;
5016   PetscFunctionReturn(0);
5017 }
5018 
5019 /* ----------------------------------------------------------------*/
5020 #undef __FUNCT__
5021 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5022 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5023 {
5024   PetscErrorCode ierr;
5025 
5026   PetscFunctionBegin;
5027   if (scall == MAT_INITIAL_MATRIX) {
5028     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5029     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5030     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5031   }
5032   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5033   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5034   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5035   PetscFunctionReturn(0);
5036 }
5037 
5038 /*MC
5039    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5040 
5041    Options Database Keys:
5042 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5043 
5044   Level: beginner
5045 
5046 .seealso: MatCreateAIJ()
5047 M*/
5048 
5049 #undef __FUNCT__
5050 #define __FUNCT__ "MatCreate_MPIAIJ"
5051 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5052 {
5053   Mat_MPIAIJ     *b;
5054   PetscErrorCode ierr;
5055   PetscMPIInt    size;
5056 
5057   PetscFunctionBegin;
5058   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5059 
5060   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5061   B->data       = (void*)b;
5062   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5063   B->assembled  = PETSC_FALSE;
5064   B->insertmode = NOT_SET_VALUES;
5065   b->size       = size;
5066 
5067   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5068 
5069   /* build cache for off array entries formed */
5070   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5071 
5072   b->donotstash  = PETSC_FALSE;
5073   b->colmap      = 0;
5074   b->garray      = 0;
5075   b->roworiented = PETSC_TRUE;
5076 
5077   /* stuff used for matrix vector multiply */
5078   b->lvec  = NULL;
5079   b->Mvctx = NULL;
5080 
5081   /* stuff for MatGetRow() */
5082   b->rowindices   = 0;
5083   b->rowvalues    = 0;
5084   b->getrowactive = PETSC_FALSE;
5085 
5086   /* flexible pointer used in CUSP/CUSPARSE classes */
5087   b->spptr = NULL;
5088 
5089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5100 #if defined(PETSC_HAVE_ELEMENTAL)
5101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5102 #endif
5103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5106   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5107   PetscFunctionReturn(0);
5108 }
5109 
5110 #undef __FUNCT__
5111 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5112 /*@C
5113      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5114          and "off-diagonal" part of the matrix in CSR format.
5115 
5116    Collective on MPI_Comm
5117 
5118    Input Parameters:
5119 +  comm - MPI communicator
5120 .  m - number of local rows (Cannot be PETSC_DECIDE)
5121 .  n - This value should be the same as the local size used in creating the
5122        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5123        calculated if N is given) For square matrices n is almost always m.
5124 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5125 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5126 .   i - row indices for "diagonal" portion of matrix
5127 .   j - column indices
5128 .   a - matrix values
5129 .   oi - row indices for "off-diagonal" portion of matrix
5130 .   oj - column indices
5131 -   oa - matrix values
5132 
5133    Output Parameter:
5134 .   mat - the matrix
5135 
5136    Level: advanced
5137 
5138    Notes:
5139        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5140        must free the arrays once the matrix has been destroyed and not before.
5141 
5142        The i and j indices are 0 based
5143 
5144        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5145 
5146        This sets local rows and cannot be used to set off-processor values.
5147 
5148        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5149        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5150        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5151        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5152        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5153        communication if it is known that only local entries will be set.
5154 
5155 .keywords: matrix, aij, compressed row, sparse, parallel
5156 
5157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5158           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5159 @*/
5160 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5161 {
5162   PetscErrorCode ierr;
5163   Mat_MPIAIJ     *maij;
5164 
5165   PetscFunctionBegin;
5166   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5167   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5168   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5169   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5170   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5171   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5172   maij = (Mat_MPIAIJ*) (*mat)->data;
5173 
5174   (*mat)->preallocated = PETSC_TRUE;
5175 
5176   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5177   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5178 
5179   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5180   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5181 
5182   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5183   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5184   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5185   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5186 
5187   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5188   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5189   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5190   PetscFunctionReturn(0);
5191 }
5192 
5193 /*
5194     Special version for direct calls from Fortran
5195 */
5196 #include <petsc/private/fortranimpl.h>
5197 
5198 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5199 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5201 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5202 #endif
5203 
5204 /* Change these macros so can be used in void function */
5205 #undef CHKERRQ
5206 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5207 #undef SETERRQ2
5208 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5209 #undef SETERRQ3
5210 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5211 #undef SETERRQ
5212 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5213 
5214 #undef __FUNCT__
5215 #define __FUNCT__ "matsetvaluesmpiaij_"
5216 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5217 {
5218   Mat            mat  = *mmat;
5219   PetscInt       m    = *mm, n = *mn;
5220   InsertMode     addv = *maddv;
5221   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5222   PetscScalar    value;
5223   PetscErrorCode ierr;
5224 
5225   MatCheckPreallocated(mat,1);
5226   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5227 
5228 #if defined(PETSC_USE_DEBUG)
5229   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5230 #endif
5231   {
5232     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5233     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5234     PetscBool roworiented = aij->roworiented;
5235 
5236     /* Some Variables required in the macro */
5237     Mat        A                 = aij->A;
5238     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5239     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5240     MatScalar  *aa               = a->a;
5241     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5242     Mat        B                 = aij->B;
5243     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5244     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5245     MatScalar  *ba               = b->a;
5246 
5247     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5248     PetscInt  nonew = a->nonew;
5249     MatScalar *ap1,*ap2;
5250 
5251     PetscFunctionBegin;
5252     for (i=0; i<m; i++) {
5253       if (im[i] < 0) continue;
5254 #if defined(PETSC_USE_DEBUG)
5255       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5256 #endif
5257       if (im[i] >= rstart && im[i] < rend) {
5258         row      = im[i] - rstart;
5259         lastcol1 = -1;
5260         rp1      = aj + ai[row];
5261         ap1      = aa + ai[row];
5262         rmax1    = aimax[row];
5263         nrow1    = ailen[row];
5264         low1     = 0;
5265         high1    = nrow1;
5266         lastcol2 = -1;
5267         rp2      = bj + bi[row];
5268         ap2      = ba + bi[row];
5269         rmax2    = bimax[row];
5270         nrow2    = bilen[row];
5271         low2     = 0;
5272         high2    = nrow2;
5273 
5274         for (j=0; j<n; j++) {
5275           if (roworiented) value = v[i*n+j];
5276           else value = v[i+j*m];
5277           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5278           if (in[j] >= cstart && in[j] < cend) {
5279             col = in[j] - cstart;
5280             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5281           } else if (in[j] < 0) continue;
5282 #if defined(PETSC_USE_DEBUG)
5283           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5284 #endif
5285           else {
5286             if (mat->was_assembled) {
5287               if (!aij->colmap) {
5288                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5289               }
5290 #if defined(PETSC_USE_CTABLE)
5291               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5292               col--;
5293 #else
5294               col = aij->colmap[in[j]] - 1;
5295 #endif
5296               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5297                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5298                 col  =  in[j];
5299                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5300                 B     = aij->B;
5301                 b     = (Mat_SeqAIJ*)B->data;
5302                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5303                 rp2   = bj + bi[row];
5304                 ap2   = ba + bi[row];
5305                 rmax2 = bimax[row];
5306                 nrow2 = bilen[row];
5307                 low2  = 0;
5308                 high2 = nrow2;
5309                 bm    = aij->B->rmap->n;
5310                 ba    = b->a;
5311               }
5312             } else col = in[j];
5313             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5314           }
5315         }
5316       } else if (!aij->donotstash) {
5317         if (roworiented) {
5318           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5319         } else {
5320           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5321         }
5322       }
5323     }
5324   }
5325   PetscFunctionReturnVoid();
5326 }
5327 
5328