xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3bf036e263fd78807e2931ff42d9ddcd8aae3fd4)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   Mat            aA,aB,Aperm;
1725   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1726   PetscScalar    *aa,*ba;
1727   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1728   PetscSF        rowsf,sf;
1729   IS             parcolp = PETSC_NULL;
1730   PetscBool      done;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1735   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1738 
1739   /* Invert row permutation to find out where my rows should go */
1740   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1741   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1742   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1743   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1744   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1746 
1747   /* Invert column permutation to find out where my columns should go */
1748   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1749   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1750   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1751   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1752   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1753   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1754   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1755 
1756   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1757   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1758   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1759 
1760   /* Find out where my gcols should go */
1761   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1762   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1763   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1764   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1765   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1766   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1767   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1768   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1769 
1770   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1771   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1772   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1773   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1774   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1775   for (i=0; i<m; i++) {
1776     PetscInt row = rdest[i],rowner;
1777     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1778     for (j=ai[i]; j<ai[i+1]; j++) {
1779       PetscInt cowner,col = cdest[aj[j]];
1780       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1781       if (rowner == cowner) dnnz[i]++;
1782       else onnz[i]++;
1783     }
1784     for (j=bi[i]; j<bi[i+1]; j++) {
1785       PetscInt cowner,col = gcdest[bj[j]];
1786       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1787       if (rowner == cowner) dnnz[i]++;
1788       else onnz[i]++;
1789     }
1790   }
1791   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1792   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1793   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1794   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1795   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1796   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1797   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1798 
1799   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1800   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1801   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1802   for (i=0; i<m; i++) {
1803     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1804     PetscInt rowlen;
1805     rowlen = ai[i+1] - ai[i];
1806     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1808     rowlen = bi[i+1] - bi[i];
1809     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1810     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1811   }
1812   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1813   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1814   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1815   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1816   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1817   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1818   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1819   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1820   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1821   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1822   *B = Aperm;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1828 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1829 {
1830   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1831   Mat            A = mat->A,B = mat->B;
1832   PetscErrorCode ierr;
1833   PetscReal      isend[5],irecv[5];
1834 
1835   PetscFunctionBegin;
1836   info->block_size     = 1.0;
1837   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1839   isend[3] = info->memory;  isend[4] = info->mallocs;
1840   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1841   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1842   isend[3] += info->memory;  isend[4] += info->mallocs;
1843   if (flag == MAT_LOCAL) {
1844     info->nz_used      = isend[0];
1845     info->nz_allocated = isend[1];
1846     info->nz_unneeded  = isend[2];
1847     info->memory       = isend[3];
1848     info->mallocs      = isend[4];
1849   } else if (flag == MAT_GLOBAL_MAX) {
1850     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1851     info->nz_used      = irecv[0];
1852     info->nz_allocated = irecv[1];
1853     info->nz_unneeded  = irecv[2];
1854     info->memory       = irecv[3];
1855     info->mallocs      = irecv[4];
1856   } else if (flag == MAT_GLOBAL_SUM) {
1857     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1858     info->nz_used      = irecv[0];
1859     info->nz_allocated = irecv[1];
1860     info->nz_unneeded  = irecv[2];
1861     info->memory       = irecv[3];
1862     info->mallocs      = irecv[4];
1863   }
1864   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1865   info->fill_ratio_needed = 0;
1866   info->factor_mallocs    = 0;
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatSetOption_MPIAIJ"
1872 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1873 {
1874   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1875   PetscErrorCode ierr;
1876 
1877   PetscFunctionBegin;
1878   switch (op) {
1879   case MAT_NEW_NONZERO_LOCATIONS:
1880   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1881   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1882   case MAT_KEEP_NONZERO_PATTERN:
1883   case MAT_NEW_NONZERO_LOCATION_ERR:
1884   case MAT_USE_INODES:
1885   case MAT_IGNORE_ZERO_ENTRIES:
1886     MatCheckPreallocated(A,1);
1887     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1888     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1889     break;
1890   case MAT_ROW_ORIENTED:
1891     a->roworiented = flg;
1892     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1893     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1894     break;
1895   case MAT_NEW_DIAGONALS:
1896     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1897     break;
1898   case MAT_IGNORE_OFF_PROC_ENTRIES:
1899     a->donotstash = flg;
1900     break;
1901   case MAT_SPD:
1902     A->spd_set                         = PETSC_TRUE;
1903     A->spd                             = flg;
1904     if (flg) {
1905       A->symmetric                     = PETSC_TRUE;
1906       A->structurally_symmetric        = PETSC_TRUE;
1907       A->symmetric_set                 = PETSC_TRUE;
1908       A->structurally_symmetric_set    = PETSC_TRUE;
1909     }
1910     break;
1911   case MAT_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_STRUCTURALLY_SYMMETRIC:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_HERMITIAN:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   case MAT_SYMMETRY_ETERNAL:
1921     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1922     break;
1923   default:
1924     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatGetRow_MPIAIJ"
1931 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1932 {
1933   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1934   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1935   PetscErrorCode ierr;
1936   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1937   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1938   PetscInt       *cmap,*idx_p;
1939 
1940   PetscFunctionBegin;
1941   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1942   mat->getrowactive = PETSC_TRUE;
1943 
1944   if (!mat->rowvalues && (idx || v)) {
1945     /*
1946         allocate enough space to hold information from the longest row.
1947     */
1948     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1949     PetscInt   max = 1,tmp;
1950     for (i=0; i<matin->rmap->n; i++) {
1951       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1952       if (max < tmp) { max = tmp; }
1953     }
1954     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1955   }
1956 
1957   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1958   lrow = row - rstart;
1959 
1960   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1961   if (!v)   {pvA = 0; pvB = 0;}
1962   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1963   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1964   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1965   nztot = nzA + nzB;
1966 
1967   cmap  = mat->garray;
1968   if (v  || idx) {
1969     if (nztot) {
1970       /* Sort by increasing column numbers, assuming A and B already sorted */
1971       PetscInt imark = -1;
1972       if (v) {
1973         *v = v_p = mat->rowvalues;
1974         for (i=0; i<nzB; i++) {
1975           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1976           else break;
1977         }
1978         imark = i;
1979         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1980         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1981       }
1982       if (idx) {
1983         *idx = idx_p = mat->rowindices;
1984         if (imark > -1) {
1985           for (i=0; i<imark; i++) {
1986             idx_p[i] = cmap[cworkB[i]];
1987           }
1988         } else {
1989           for (i=0; i<nzB; i++) {
1990             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1991             else break;
1992           }
1993           imark = i;
1994         }
1995         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1996         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1997       }
1998     } else {
1999       if (idx) *idx = 0;
2000       if (v)   *v   = 0;
2001     }
2002   }
2003   *nz = nztot;
2004   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2005   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2011 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2012 {
2013   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2014 
2015   PetscFunctionBegin;
2016   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2017   aij->getrowactive = PETSC_FALSE;
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatNorm_MPIAIJ"
2023 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2024 {
2025   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2026   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2027   PetscErrorCode ierr;
2028   PetscInt       i,j,cstart = mat->cmap->rstart;
2029   PetscReal      sum = 0.0;
2030   MatScalar      *v;
2031 
2032   PetscFunctionBegin;
2033   if (aij->size == 1) {
2034     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2035   } else {
2036     if (type == NORM_FROBENIUS) {
2037       v = amat->a;
2038       for (i=0; i<amat->nz; i++) {
2039         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2040       }
2041       v = bmat->a;
2042       for (i=0; i<bmat->nz; i++) {
2043         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2044       }
2045       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2046       *norm = PetscSqrtReal(*norm);
2047     } else if (type == NORM_1) { /* max column norm */
2048       PetscReal *tmp,*tmp2;
2049       PetscInt  *jj,*garray = aij->garray;
2050       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2051       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2052       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2053       *norm = 0.0;
2054       v = amat->a; jj = amat->j;
2055       for (j=0; j<amat->nz; j++) {
2056         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2057       }
2058       v = bmat->a; jj = bmat->j;
2059       for (j=0; j<bmat->nz; j++) {
2060         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2061       }
2062       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2063       for (j=0; j<mat->cmap->N; j++) {
2064         if (tmp2[j] > *norm) *norm = tmp2[j];
2065       }
2066       ierr = PetscFree(tmp);CHKERRQ(ierr);
2067       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2068     } else if (type == NORM_INFINITY) { /* max row norm */
2069       PetscReal ntemp = 0.0;
2070       for (j=0; j<aij->A->rmap->n; j++) {
2071         v = amat->a + amat->i[j];
2072         sum = 0.0;
2073         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2074           sum += PetscAbsScalar(*v); v++;
2075         }
2076         v = bmat->a + bmat->i[j];
2077         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2078           sum += PetscAbsScalar(*v); v++;
2079         }
2080         if (sum > ntemp) ntemp = sum;
2081       }
2082       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2083     } else {
2084       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2085     }
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatTranspose_MPIAIJ"
2092 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2093 {
2094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2095   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2096   PetscErrorCode ierr;
2097   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2098   PetscInt       cstart=A->cmap->rstart,ncol;
2099   Mat            B;
2100   MatScalar      *array;
2101 
2102   PetscFunctionBegin;
2103   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2104 
2105   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2106   ai = Aloc->i; aj = Aloc->j;
2107   bi = Bloc->i; bj = Bloc->j;
2108   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2109     PetscInt *d_nnz,*g_nnz,*o_nnz;
2110     PetscSFNode *oloc;
2111     PETSC_UNUSED PetscSF sf;
2112 
2113     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2114     /* compute d_nnz for preallocation */
2115     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2116     for (i=0; i<ai[ma]; i++) {
2117       d_nnz[aj[i]] ++;
2118       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2119     }
2120     /* compute local off-diagonal contributions */
2121     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2122     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2123     /* map those to global */
2124     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2125     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2126     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2127     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2128     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2129     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2130     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2131 
2132     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2133     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2134     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2135     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2136     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2137     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2138   } else {
2139     B = *matout;
2140     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2141     for (i=0; i<ai[ma]; i++){
2142       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2143     }
2144   }
2145 
2146   /* copy over the A part */
2147   array = Aloc->a;
2148   row = A->rmap->rstart;
2149   for (i=0; i<ma; i++) {
2150     ncol = ai[i+1]-ai[i];
2151     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++; array += ncol; aj += ncol;
2153   }
2154   aj = Aloc->j;
2155   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2156 
2157   /* copy over the B part */
2158   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2159   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2160   array = Bloc->a;
2161   row = A->rmap->rstart;
2162   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2163   cols_tmp = cols;
2164   for (i=0; i<mb; i++) {
2165     ncol = bi[i+1]-bi[i];
2166     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2167     row++; array += ncol; cols_tmp += ncol;
2168   }
2169   ierr = PetscFree(cols);CHKERRQ(ierr);
2170 
2171   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2172   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2173   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2174     *matout = B;
2175   } else {
2176     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2177   }
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2183 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2184 {
2185   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2186   Mat            a = aij->A,b = aij->B;
2187   PetscErrorCode ierr;
2188   PetscInt       s1,s2,s3;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2192   if (rr) {
2193     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2194     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2195     /* Overlap communication with computation. */
2196     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2197   }
2198   if (ll) {
2199     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2200     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2201     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2202   }
2203   /* scale  the diagonal block */
2204   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2205 
2206   if (rr) {
2207     /* Do a scatter end and then right scale the off-diagonal block */
2208     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2209     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2210   }
2211 
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 #undef __FUNCT__
2216 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2217 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2218 {
2219   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2220   PetscErrorCode ierr;
2221 
2222   PetscFunctionBegin;
2223   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatEqual_MPIAIJ"
2229 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2230 {
2231   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2232   Mat            a,b,c,d;
2233   PetscBool      flg;
2234   PetscErrorCode ierr;
2235 
2236   PetscFunctionBegin;
2237   a = matA->A; b = matA->B;
2238   c = matB->A; d = matB->B;
2239 
2240   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2241   if (flg) {
2242     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2243   }
2244   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatCopy_MPIAIJ"
2250 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2251 {
2252   PetscErrorCode ierr;
2253   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2254   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2255 
2256   PetscFunctionBegin;
2257   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2258   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2259     /* because of the column compression in the off-processor part of the matrix a->B,
2260        the number of columns in a->B and b->B may be different, hence we cannot call
2261        the MatCopy() directly on the two parts. If need be, we can provide a more
2262        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2263        then copying the submatrices */
2264     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2265   } else {
2266     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2267     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatSetUp_MPIAIJ"
2274 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2275 {
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2285 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2286 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2287 {
2288   PetscInt          i,m=Y->rmap->N;
2289   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2290   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2291   const PetscInt    *xi = x->i,*yi = y->i;
2292 
2293   PetscFunctionBegin;
2294   /* Set the number of nonzeros in the new matrix */
2295   for (i=0; i<m; i++) {
2296     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2297     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2298     nnz[i] = 0;
2299     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2300       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2301       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2302       nnz[i]++;
2303     }
2304     for (; k<nzy; k++) nnz[i]++;
2305   }
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatAXPY_MPIAIJ"
2311 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2312 {
2313   PetscErrorCode ierr;
2314   PetscInt       i;
2315   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2316   PetscBLASInt   bnz,one=1;
2317   Mat_SeqAIJ     *x,*y;
2318 
2319   PetscFunctionBegin;
2320   if (str == SAME_NONZERO_PATTERN) {
2321     PetscScalar alpha = a;
2322     x = (Mat_SeqAIJ *)xx->A->data;
2323     y = (Mat_SeqAIJ *)yy->A->data;
2324     bnz = PetscBLASIntCast(x->nz);
2325     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2326     x = (Mat_SeqAIJ *)xx->B->data;
2327     y = (Mat_SeqAIJ *)yy->B->data;
2328     bnz = PetscBLASIntCast(x->nz);
2329     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2330   } else if (str == SUBSET_NONZERO_PATTERN) {
2331     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2332 
2333     x = (Mat_SeqAIJ *)xx->B->data;
2334     y = (Mat_SeqAIJ *)yy->B->data;
2335     if (y->xtoy && y->XtoY != xx->B) {
2336       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2337       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2338     }
2339     if (!y->xtoy) { /* get xtoy */
2340       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2341       y->XtoY = xx->B;
2342       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2343     }
2344     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2345   } else {
2346     Mat B;
2347     PetscInt *nnz_d,*nnz_o;
2348     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2349     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2350     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2351     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2352     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2353     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2354     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2355     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2356     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2357     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2358     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2359     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2360     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2361     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2362   }
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatConjugate_MPIAIJ"
2370 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2371 {
2372 #if defined(PETSC_USE_COMPLEX)
2373   PetscErrorCode ierr;
2374   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2375 
2376   PetscFunctionBegin;
2377   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2378   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2379 #else
2380   PetscFunctionBegin;
2381 #endif
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatRealPart_MPIAIJ"
2387 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2388 {
2389   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2390   PetscErrorCode ierr;
2391 
2392   PetscFunctionBegin;
2393   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2394   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2400 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2401 {
2402   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2403   PetscErrorCode ierr;
2404 
2405   PetscFunctionBegin;
2406   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2407   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 #ifdef PETSC_HAVE_PBGL
2412 
2413 #include <boost/parallel/mpi/bsp_process_group.hpp>
2414 #include <boost/graph/distributed/ilu_default_graph.hpp>
2415 #include <boost/graph/distributed/ilu_0_block.hpp>
2416 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2417 #include <boost/graph/distributed/petsc/interface.hpp>
2418 #include <boost/multi_array.hpp>
2419 #include <boost/parallel/distributed_property_map->hpp>
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2423 /*
2424   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2425 */
2426 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2427 {
2428   namespace petsc = boost::distributed::petsc;
2429 
2430   namespace graph_dist = boost::graph::distributed;
2431   using boost::graph::distributed::ilu_default::process_group_type;
2432   using boost::graph::ilu_permuted;
2433 
2434   PetscBool       row_identity, col_identity;
2435   PetscContainer  c;
2436   PetscInt        m, n, M, N;
2437   PetscErrorCode  ierr;
2438 
2439   PetscFunctionBegin;
2440   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2441   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2442   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2443   if (!row_identity || !col_identity) {
2444     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2445   }
2446 
2447   process_group_type pg;
2448   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2449   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2450   lgraph_type&   level_graph = *lgraph_p;
2451   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2452 
2453   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2454   ilu_permuted(level_graph);
2455 
2456   /* put together the new matrix */
2457   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2458   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2459   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2460   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2461   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2462   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2463   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2464   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2465 
2466   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2467   ierr = PetscContainerSetPointer(c, lgraph_p);
2468   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2469   ierr = PetscContainerDestroy(&c);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2475 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2476 {
2477   PetscFunctionBegin;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatSolve_MPIAIJ"
2483 /*
2484   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2485 */
2486 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2487 {
2488   namespace graph_dist = boost::graph::distributed;
2489 
2490   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2491   lgraph_type*   lgraph_p;
2492   PetscContainer c;
2493   PetscErrorCode ierr;
2494 
2495   PetscFunctionBegin;
2496   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2497   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2498   ierr = VecCopy(b, x);CHKERRQ(ierr);
2499 
2500   PetscScalar* array_x;
2501   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2502   PetscInt sx;
2503   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2504 
2505   PetscScalar* array_b;
2506   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2507   PetscInt sb;
2508   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2509 
2510   lgraph_type&   level_graph = *lgraph_p;
2511   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2512 
2513   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2514   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2515                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2516 
2517   typedef boost::iterator_property_map<array_ref_type::iterator,
2518                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2519   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2520                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2521 
2522   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2523 
2524   PetscFunctionReturn(0);
2525 }
2526 #endif
2527 
2528 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2529   PetscInt       nzlocal,nsends,nrecvs;
2530   PetscMPIInt    *send_rank,*recv_rank;
2531   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2532   PetscScalar    *sbuf_a,**rbuf_a;
2533   PetscErrorCode (*Destroy)(Mat);
2534 } Mat_Redundant;
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2538 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2539 {
2540   PetscErrorCode       ierr;
2541   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2542   PetscInt             i;
2543 
2544   PetscFunctionBegin;
2545   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2546   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2547   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2548   for (i=0; i<redund->nrecvs; i++){
2549     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2550     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2551   }
2552   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2553   ierr = PetscFree(redund);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatDestroy_MatRedundant"
2559 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2560 {
2561   PetscErrorCode  ierr;
2562   PetscContainer  container;
2563   Mat_Redundant   *redund=PETSC_NULL;
2564 
2565   PetscFunctionBegin;
2566   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2567   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2568   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2569   A->ops->destroy = redund->Destroy;
2570   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2571   if (A->ops->destroy) {
2572     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2573   }
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 #undef __FUNCT__
2578 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2579 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2580 {
2581   PetscMPIInt    rank,size;
2582   MPI_Comm       comm=((PetscObject)mat)->comm;
2583   PetscErrorCode ierr;
2584   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2585   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2586   PetscInt       *rowrange=mat->rmap->range;
2587   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2588   Mat            A=aij->A,B=aij->B,C=*matredundant;
2589   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2590   PetscScalar    *sbuf_a;
2591   PetscInt       nzlocal=a->nz+b->nz;
2592   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2593   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2594   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2595   MatScalar      *aworkA,*aworkB;
2596   PetscScalar    *vals;
2597   PetscMPIInt    tag1,tag2,tag3,imdex;
2598   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2599                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2600   MPI_Status     recv_status,*send_status;
2601   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2602   PetscInt       **rbuf_j=PETSC_NULL;
2603   PetscScalar    **rbuf_a=PETSC_NULL;
2604   Mat_Redundant  *redund=PETSC_NULL;
2605   PetscContainer container;
2606 
2607   PetscFunctionBegin;
2608   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2609   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2610 
2611   if (reuse == MAT_REUSE_MATRIX) {
2612     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2613     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2614     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2615     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2616     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2617     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2618     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2619     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2620 
2621     nsends    = redund->nsends;
2622     nrecvs    = redund->nrecvs;
2623     send_rank = redund->send_rank;
2624     recv_rank = redund->recv_rank;
2625     sbuf_nz   = redund->sbuf_nz;
2626     rbuf_nz   = redund->rbuf_nz;
2627     sbuf_j    = redund->sbuf_j;
2628     sbuf_a    = redund->sbuf_a;
2629     rbuf_j    = redund->rbuf_j;
2630     rbuf_a    = redund->rbuf_a;
2631   }
2632 
2633   if (reuse == MAT_INITIAL_MATRIX){
2634     PetscMPIInt  subrank,subsize;
2635     PetscInt     nleftover,np_subcomm;
2636     /* get the destination processors' id send_rank, nsends and nrecvs */
2637     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2638     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2639     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2640     np_subcomm = size/nsubcomm;
2641     nleftover  = size - nsubcomm*np_subcomm;
2642     nsends = 0; nrecvs = 0;
2643     for (i=0; i<size; i++){ /* i=rank*/
2644       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2645         send_rank[nsends] = i; nsends++;
2646         recv_rank[nrecvs++] = i;
2647       }
2648     }
2649     if (rank >= size - nleftover){/* this proc is a leftover processor */
2650       i = size-nleftover-1;
2651       j = 0;
2652       while (j < nsubcomm - nleftover){
2653         send_rank[nsends++] = i;
2654         i--; j++;
2655       }
2656     }
2657 
2658     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2659       for (i=0; i<nleftover; i++){
2660         recv_rank[nrecvs++] = size-nleftover+i;
2661       }
2662     }
2663 
2664     /* allocate sbuf_j, sbuf_a */
2665     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2666     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2667     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2668   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2669 
2670   /* copy mat's local entries into the buffers */
2671   if (reuse == MAT_INITIAL_MATRIX){
2672     rownz_max = 0;
2673     rptr = sbuf_j;
2674     cols = sbuf_j + rend-rstart + 1;
2675     vals = sbuf_a;
2676     rptr[0] = 0;
2677     for (i=0; i<rend-rstart; i++){
2678       row = i + rstart;
2679       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2680       ncols  = nzA + nzB;
2681       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2682       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2683       /* load the column indices for this row into cols */
2684       lwrite = 0;
2685       for (l=0; l<nzB; l++) {
2686         if ((ctmp = bmap[cworkB[l]]) < cstart){
2687           vals[lwrite]   = aworkB[l];
2688           cols[lwrite++] = ctmp;
2689         }
2690       }
2691       for (l=0; l<nzA; l++){
2692         vals[lwrite]   = aworkA[l];
2693         cols[lwrite++] = cstart + cworkA[l];
2694       }
2695       for (l=0; l<nzB; l++) {
2696         if ((ctmp = bmap[cworkB[l]]) >= cend){
2697           vals[lwrite]   = aworkB[l];
2698           cols[lwrite++] = ctmp;
2699         }
2700       }
2701       vals += ncols;
2702       cols += ncols;
2703       rptr[i+1] = rptr[i] + ncols;
2704       if (rownz_max < ncols) rownz_max = ncols;
2705     }
2706     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2707   } else { /* only copy matrix values into sbuf_a */
2708     rptr = sbuf_j;
2709     vals = sbuf_a;
2710     rptr[0] = 0;
2711     for (i=0; i<rend-rstart; i++){
2712       row = i + rstart;
2713       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2714       ncols  = nzA + nzB;
2715       cworkB = b->j + b->i[i];
2716       aworkA = a->a + a->i[i];
2717       aworkB = b->a + b->i[i];
2718       lwrite = 0;
2719       for (l=0; l<nzB; l++) {
2720         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2721       }
2722       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2723       for (l=0; l<nzB; l++) {
2724         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2725       }
2726       vals += ncols;
2727       rptr[i+1] = rptr[i] + ncols;
2728     }
2729   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2730 
2731   /* send nzlocal to others, and recv other's nzlocal */
2732   /*--------------------------------------------------*/
2733   if (reuse == MAT_INITIAL_MATRIX){
2734     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2735     s_waits2 = s_waits3 + nsends;
2736     s_waits1 = s_waits2 + nsends;
2737     r_waits1 = s_waits1 + nsends;
2738     r_waits2 = r_waits1 + nrecvs;
2739     r_waits3 = r_waits2 + nrecvs;
2740   } else {
2741     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2742     r_waits3 = s_waits3 + nsends;
2743   }
2744 
2745   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2746   if (reuse == MAT_INITIAL_MATRIX){
2747     /* get new tags to keep the communication clean */
2748     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2749     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2750     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2751 
2752     /* post receives of other's nzlocal */
2753     for (i=0; i<nrecvs; i++){
2754       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2755     }
2756     /* send nzlocal to others */
2757     for (i=0; i<nsends; i++){
2758       sbuf_nz[i] = nzlocal;
2759       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2760     }
2761     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2762     count = nrecvs;
2763     while (count) {
2764       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2765       recv_rank[imdex] = recv_status.MPI_SOURCE;
2766       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2767       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2768 
2769       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2770       rbuf_nz[imdex] += i + 2;
2771       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2772       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2773       count--;
2774     }
2775     /* wait on sends of nzlocal */
2776     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2777     /* send mat->i,j to others, and recv from other's */
2778     /*------------------------------------------------*/
2779     for (i=0; i<nsends; i++){
2780       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2781       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2782     }
2783     /* wait on receives of mat->i,j */
2784     /*------------------------------*/
2785     count = nrecvs;
2786     while (count) {
2787       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2788       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2789       count--;
2790     }
2791     /* wait on sends of mat->i,j */
2792     /*---------------------------*/
2793     if (nsends) {
2794       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2795     }
2796   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2797 
2798   /* post receives, send and receive mat->a */
2799   /*----------------------------------------*/
2800   for (imdex=0; imdex<nrecvs; imdex++) {
2801     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2802   }
2803   for (i=0; i<nsends; i++){
2804     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2805   }
2806   count = nrecvs;
2807   while (count) {
2808     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2809     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2810     count--;
2811   }
2812   if (nsends) {
2813     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2814   }
2815 
2816   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2817 
2818   /* create redundant matrix */
2819   /*-------------------------*/
2820   if (reuse == MAT_INITIAL_MATRIX){
2821     /* compute rownz_max for preallocation */
2822     for (imdex=0; imdex<nrecvs; imdex++){
2823       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2824       rptr = rbuf_j[imdex];
2825       for (i=0; i<j; i++){
2826         ncols = rptr[i+1] - rptr[i];
2827         if (rownz_max < ncols) rownz_max = ncols;
2828       }
2829     }
2830 
2831     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2832     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2833     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2834     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2835     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2836     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2837   } else {
2838     C = *matredundant;
2839   }
2840 
2841   /* insert local matrix entries */
2842   rptr = sbuf_j;
2843   cols = sbuf_j + rend-rstart + 1;
2844   vals = sbuf_a;
2845   for (i=0; i<rend-rstart; i++){
2846     row   = i + rstart;
2847     ncols = rptr[i+1] - rptr[i];
2848     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2849     vals += ncols;
2850     cols += ncols;
2851   }
2852   /* insert received matrix entries */
2853   for (imdex=0; imdex<nrecvs; imdex++){
2854     rstart = rowrange[recv_rank[imdex]];
2855     rend   = rowrange[recv_rank[imdex]+1];
2856     rptr = rbuf_j[imdex];
2857     cols = rbuf_j[imdex] + rend-rstart + 1;
2858     vals = rbuf_a[imdex];
2859     for (i=0; i<rend-rstart; i++){
2860       row   = i + rstart;
2861       ncols = rptr[i+1] - rptr[i];
2862       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2863       vals += ncols;
2864       cols += ncols;
2865     }
2866   }
2867   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2868   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2870   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2871   if (reuse == MAT_INITIAL_MATRIX) {
2872     PetscContainer container;
2873     *matredundant = C;
2874     /* create a supporting struct and attach it to C for reuse */
2875     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2876     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2877     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2878     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2879     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2880     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2881 
2882     redund->nzlocal = nzlocal;
2883     redund->nsends  = nsends;
2884     redund->nrecvs  = nrecvs;
2885     redund->send_rank = send_rank;
2886     redund->recv_rank = recv_rank;
2887     redund->sbuf_nz = sbuf_nz;
2888     redund->rbuf_nz = rbuf_nz;
2889     redund->sbuf_j  = sbuf_j;
2890     redund->sbuf_a  = sbuf_a;
2891     redund->rbuf_j  = rbuf_j;
2892     redund->rbuf_a  = rbuf_a;
2893 
2894     redund->Destroy = C->ops->destroy;
2895     C->ops->destroy = MatDestroy_MatRedundant;
2896   }
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 #undef __FUNCT__
2901 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2902 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2903 {
2904   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2905   PetscErrorCode ierr;
2906   PetscInt       i,*idxb = 0;
2907   PetscScalar    *va,*vb;
2908   Vec            vtmp;
2909 
2910   PetscFunctionBegin;
2911   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2912   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2913   if (idx) {
2914     for (i=0; i<A->rmap->n; i++) {
2915       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2916     }
2917   }
2918 
2919   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2920   if (idx) {
2921     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2922   }
2923   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2924   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2925 
2926   for (i=0; i<A->rmap->n; i++){
2927     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2928       va[i] = vb[i];
2929       if (idx) idx[i] = a->garray[idxb[i]];
2930     }
2931   }
2932 
2933   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2934   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2935   ierr = PetscFree(idxb);CHKERRQ(ierr);
2936   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2942 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2943 {
2944   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2945   PetscErrorCode ierr;
2946   PetscInt       i,*idxb = 0;
2947   PetscScalar    *va,*vb;
2948   Vec            vtmp;
2949 
2950   PetscFunctionBegin;
2951   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2952   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2953   if (idx) {
2954     for (i=0; i<A->cmap->n; i++) {
2955       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2956     }
2957   }
2958 
2959   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2960   if (idx) {
2961     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2962   }
2963   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2964   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2965 
2966   for (i=0; i<A->rmap->n; i++){
2967     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2968       va[i] = vb[i];
2969       if (idx) idx[i] = a->garray[idxb[i]];
2970     }
2971   }
2972 
2973   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2974   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2975   ierr = PetscFree(idxb);CHKERRQ(ierr);
2976   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 #undef __FUNCT__
2981 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2982 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2983 {
2984   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2985   PetscInt       n      = A->rmap->n;
2986   PetscInt       cstart = A->cmap->rstart;
2987   PetscInt      *cmap   = mat->garray;
2988   PetscInt      *diagIdx, *offdiagIdx;
2989   Vec            diagV, offdiagV;
2990   PetscScalar   *a, *diagA, *offdiagA;
2991   PetscInt       r;
2992   PetscErrorCode ierr;
2993 
2994   PetscFunctionBegin;
2995   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2996   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2997   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2998   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2999   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3000   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3001   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3002   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3003   for (r = 0; r < n; ++r) {
3004     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3005       a[r]   = diagA[r];
3006       idx[r] = cstart + diagIdx[r];
3007     } else {
3008       a[r]   = offdiagA[r];
3009       idx[r] = cmap[offdiagIdx[r]];
3010     }
3011   }
3012   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3013   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3014   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3015   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3016   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3017   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 #undef __FUNCT__
3022 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3023 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3024 {
3025   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3026   PetscInt       n      = A->rmap->n;
3027   PetscInt       cstart = A->cmap->rstart;
3028   PetscInt      *cmap   = mat->garray;
3029   PetscInt      *diagIdx, *offdiagIdx;
3030   Vec            diagV, offdiagV;
3031   PetscScalar   *a, *diagA, *offdiagA;
3032   PetscInt       r;
3033   PetscErrorCode ierr;
3034 
3035   PetscFunctionBegin;
3036   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3037   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3038   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3039   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3040   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3041   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3042   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3043   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3044   for (r = 0; r < n; ++r) {
3045     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3046       a[r]   = diagA[r];
3047       idx[r] = cstart + diagIdx[r];
3048     } else {
3049       a[r]   = offdiagA[r];
3050       idx[r] = cmap[offdiagIdx[r]];
3051     }
3052   }
3053   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3054   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3055   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3056   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3057   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3058   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3064 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3065 {
3066   PetscErrorCode ierr;
3067   Mat            *dummy;
3068 
3069   PetscFunctionBegin;
3070   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3071   *newmat = *dummy;
3072   ierr = PetscFree(dummy);CHKERRQ(ierr);
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3080 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3081 {
3082   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3083   PetscErrorCode ierr;
3084 
3085   PetscFunctionBegin;
3086   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 #undef __FUNCT__
3091 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3092 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3093 {
3094   PetscErrorCode ierr;
3095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3096 
3097   PetscFunctionBegin;
3098   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3099   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3100   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3101   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 /* -------------------------------------------------------------------*/
3106 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3107        MatGetRow_MPIAIJ,
3108        MatRestoreRow_MPIAIJ,
3109        MatMult_MPIAIJ,
3110 /* 4*/ MatMultAdd_MPIAIJ,
3111        MatMultTranspose_MPIAIJ,
3112        MatMultTransposeAdd_MPIAIJ,
3113 #ifdef PETSC_HAVE_PBGL
3114        MatSolve_MPIAIJ,
3115 #else
3116        0,
3117 #endif
3118        0,
3119        0,
3120 /*10*/ 0,
3121        0,
3122        0,
3123        MatSOR_MPIAIJ,
3124        MatTranspose_MPIAIJ,
3125 /*15*/ MatGetInfo_MPIAIJ,
3126        MatEqual_MPIAIJ,
3127        MatGetDiagonal_MPIAIJ,
3128        MatDiagonalScale_MPIAIJ,
3129        MatNorm_MPIAIJ,
3130 /*20*/ MatAssemblyBegin_MPIAIJ,
3131        MatAssemblyEnd_MPIAIJ,
3132        MatSetOption_MPIAIJ,
3133        MatZeroEntries_MPIAIJ,
3134 /*24*/ MatZeroRows_MPIAIJ,
3135        0,
3136 #ifdef PETSC_HAVE_PBGL
3137        0,
3138 #else
3139        0,
3140 #endif
3141        0,
3142        0,
3143 /*29*/ MatSetUp_MPIAIJ,
3144 #ifdef PETSC_HAVE_PBGL
3145        0,
3146 #else
3147        0,
3148 #endif
3149        0,
3150        0,
3151        0,
3152 /*34*/ MatDuplicate_MPIAIJ,
3153        0,
3154        0,
3155        0,
3156        0,
3157 /*39*/ MatAXPY_MPIAIJ,
3158        MatGetSubMatrices_MPIAIJ,
3159        MatIncreaseOverlap_MPIAIJ,
3160        MatGetValues_MPIAIJ,
3161        MatCopy_MPIAIJ,
3162 /*44*/ MatGetRowMax_MPIAIJ,
3163        MatScale_MPIAIJ,
3164        0,
3165        0,
3166        MatZeroRowsColumns_MPIAIJ,
3167 /*49*/ MatSetRandom_MPIAIJ,
3168        0,
3169        0,
3170        0,
3171        0,
3172 /*54*/ MatFDColoringCreate_MPIAIJ,
3173        0,
3174        MatSetUnfactored_MPIAIJ,
3175        MatPermute_MPIAIJ,
3176        0,
3177 /*59*/ MatGetSubMatrix_MPIAIJ,
3178        MatDestroy_MPIAIJ,
3179        MatView_MPIAIJ,
3180        0,
3181        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3182 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3183        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3184        0,
3185        0,
3186        0,
3187 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3188        MatGetRowMinAbs_MPIAIJ,
3189        0,
3190        MatSetColoring_MPIAIJ,
3191        0,
3192        MatSetValuesAdifor_MPIAIJ,
3193 /*75*/ MatFDColoringApply_AIJ,
3194        0,
3195        0,
3196        0,
3197        MatFindZeroDiagonals_MPIAIJ,
3198 /*80*/ 0,
3199        0,
3200        0,
3201 /*83*/ MatLoad_MPIAIJ,
3202        0,
3203        0,
3204        0,
3205        0,
3206        0,
3207 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3208        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3209        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3210        MatPtAP_MPIAIJ_MPIAIJ,
3211        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3212 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3213        0,
3214        0,
3215        0,
3216        0,
3217 /*99*/ 0,
3218        0,
3219        0,
3220        MatConjugate_MPIAIJ,
3221        0,
3222 /*104*/MatSetValuesRow_MPIAIJ,
3223        MatRealPart_MPIAIJ,
3224        MatImaginaryPart_MPIAIJ,
3225        0,
3226        0,
3227 /*109*/0,
3228        MatGetRedundantMatrix_MPIAIJ,
3229        MatGetRowMin_MPIAIJ,
3230        0,
3231        0,
3232 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3233        0,
3234        0,
3235        0,
3236        0,
3237 /*119*/0,
3238        0,
3239        0,
3240        0,
3241        MatGetMultiProcBlock_MPIAIJ,
3242 /*124*/MatFindNonzeroRows_MPIAIJ,
3243        MatGetColumnNorms_MPIAIJ,
3244        MatInvertBlockDiagonal_MPIAIJ,
3245        0,
3246        MatGetSubMatricesParallel_MPIAIJ,
3247 /*129*/0,
3248        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3249        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3250        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3251        0,
3252 /*134*/0,
3253        0,
3254        0,
3255        0,
3256        0
3257 };
3258 
3259 /* ----------------------------------------------------------------------------------------*/
3260 
3261 EXTERN_C_BEGIN
3262 #undef __FUNCT__
3263 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3264 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3265 {
3266   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3267   PetscErrorCode ierr;
3268 
3269   PetscFunctionBegin;
3270   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3271   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3272   PetscFunctionReturn(0);
3273 }
3274 EXTERN_C_END
3275 
3276 EXTERN_C_BEGIN
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3279 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3280 {
3281   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3282   PetscErrorCode ierr;
3283 
3284   PetscFunctionBegin;
3285   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3286   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3287   PetscFunctionReturn(0);
3288 }
3289 EXTERN_C_END
3290 
3291 EXTERN_C_BEGIN
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3294 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3295 {
3296   Mat_MPIAIJ     *b;
3297   PetscErrorCode ierr;
3298   PetscInt       i;
3299   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3300 
3301   PetscFunctionBegin;
3302   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3303   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3304   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3305   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3306   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3307   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3308 
3309   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3310   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3311   if (d_nnz) {
3312     for (i=0; i<B->rmap->n; i++) {
3313       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3314     }
3315   }
3316   if (o_nnz) {
3317     for (i=0; i<B->rmap->n; i++) {
3318       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3319     }
3320   }
3321   b = (Mat_MPIAIJ*)B->data;
3322 
3323   if (!B->preallocated) {
3324     /* Explicitly create 2 MATSEQAIJ matrices. */
3325     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3326     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3327     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3328     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3329     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3330     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3331     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3332     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3333     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3334     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3335   }
3336 
3337   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3338   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3339   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3340   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3341   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3342   B->preallocated = PETSC_TRUE;
3343   PetscFunctionReturn(0);
3344 }
3345 EXTERN_C_END
3346 
3347 #undef __FUNCT__
3348 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3349 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3350 {
3351   Mat            mat;
3352   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3353   PetscErrorCode ierr;
3354 
3355   PetscFunctionBegin;
3356   *newmat       = 0;
3357   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3358   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3359   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3360   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3361   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3362   a    = (Mat_MPIAIJ*)mat->data;
3363 
3364   mat->factortype    = matin->factortype;
3365   mat->rmap->bs      = matin->rmap->bs;
3366   mat->cmap->bs      = matin->cmap->bs;
3367   mat->assembled    = PETSC_TRUE;
3368   mat->insertmode   = NOT_SET_VALUES;
3369   mat->preallocated = PETSC_TRUE;
3370 
3371   a->size           = oldmat->size;
3372   a->rank           = oldmat->rank;
3373   a->donotstash     = oldmat->donotstash;
3374   a->roworiented    = oldmat->roworiented;
3375   a->rowindices     = 0;
3376   a->rowvalues      = 0;
3377   a->getrowactive   = PETSC_FALSE;
3378 
3379   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3380   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3381 
3382   if (oldmat->colmap) {
3383 #if defined (PETSC_USE_CTABLE)
3384     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3385 #else
3386     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3387     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3388     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3389 #endif
3390   } else a->colmap = 0;
3391   if (oldmat->garray) {
3392     PetscInt len;
3393     len  = oldmat->B->cmap->n;
3394     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3395     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3396     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3397   } else a->garray = 0;
3398 
3399   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3400   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3401   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3402   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3403   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3404   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3405   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3406   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3407   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3408   *newmat = mat;
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 
3413 
3414 #undef __FUNCT__
3415 #define __FUNCT__ "MatLoad_MPIAIJ"
3416 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3417 {
3418   PetscScalar    *vals,*svals;
3419   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3420   PetscErrorCode ierr;
3421   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3422   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3423   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3424   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3425   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3426   int            fd;
3427   PetscInt       bs = 1;
3428 
3429   PetscFunctionBegin;
3430   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3431   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3432   if (!rank) {
3433     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3434     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3435     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3436   }
3437 
3438   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3439   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3440   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3441 
3442   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3443 
3444   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3445   M = header[1]; N = header[2];
3446   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3447   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3448   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3449 
3450   /* If global sizes are set, check if they are consistent with that given in the file */
3451   if (sizesset) {
3452     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3453   }
3454   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3455   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3456 
3457   /* determine ownership of all (block) rows */
3458   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3459   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3460   else m = newMat->rmap->n; /* Set by user */
3461 
3462   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3463   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3464 
3465   /* First process needs enough room for process with most rows */
3466   if (!rank) {
3467     mmax = rowners[1];
3468     for (i=2; i<=size; i++) {
3469       mmax = PetscMax(mmax, rowners[i]);
3470     }
3471   } else mmax = m;
3472 
3473   rowners[0] = 0;
3474   for (i=2; i<=size; i++) {
3475     rowners[i] += rowners[i-1];
3476   }
3477   rstart = rowners[rank];
3478   rend   = rowners[rank+1];
3479 
3480   /* distribute row lengths to all processors */
3481   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3482   if (!rank) {
3483     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3484     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3485     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3486     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3487     for (j=0; j<m; j++) {
3488       procsnz[0] += ourlens[j];
3489     }
3490     for (i=1; i<size; i++) {
3491       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3492       /* calculate the number of nonzeros on each processor */
3493       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3494         procsnz[i] += rowlengths[j];
3495       }
3496       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3497     }
3498     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3499   } else {
3500     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3501   }
3502 
3503   if (!rank) {
3504     /* determine max buffer needed and allocate it */
3505     maxnz = 0;
3506     for (i=0; i<size; i++) {
3507       maxnz = PetscMax(maxnz,procsnz[i]);
3508     }
3509     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3510 
3511     /* read in my part of the matrix column indices  */
3512     nz   = procsnz[0];
3513     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3514     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3515 
3516     /* read in every one elses and ship off */
3517     for (i=1; i<size; i++) {
3518       nz     = procsnz[i];
3519       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3520       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3521     }
3522     ierr = PetscFree(cols);CHKERRQ(ierr);
3523   } else {
3524     /* determine buffer space needed for message */
3525     nz = 0;
3526     for (i=0; i<m; i++) {
3527       nz += ourlens[i];
3528     }
3529     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3530 
3531     /* receive message of column indices*/
3532     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3533   }
3534 
3535   /* determine column ownership if matrix is not square */
3536   if (N != M) {
3537     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3538     else n = newMat->cmap->n;
3539     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3540     cstart = cend - n;
3541   } else {
3542     cstart = rstart;
3543     cend   = rend;
3544     n      = cend - cstart;
3545   }
3546 
3547   /* loop over local rows, determining number of off diagonal entries */
3548   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3549   jj = 0;
3550   for (i=0; i<m; i++) {
3551     for (j=0; j<ourlens[i]; j++) {
3552       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3553       jj++;
3554     }
3555   }
3556 
3557   for (i=0; i<m; i++) {
3558     ourlens[i] -= offlens[i];
3559   }
3560   if (!sizesset) {
3561     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3562   }
3563 
3564   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3565 
3566   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3567 
3568   for (i=0; i<m; i++) {
3569     ourlens[i] += offlens[i];
3570   }
3571 
3572   if (!rank) {
3573     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3574 
3575     /* read in my part of the matrix numerical values  */
3576     nz   = procsnz[0];
3577     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3578 
3579     /* insert into matrix */
3580     jj      = rstart;
3581     smycols = mycols;
3582     svals   = vals;
3583     for (i=0; i<m; i++) {
3584       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3585       smycols += ourlens[i];
3586       svals   += ourlens[i];
3587       jj++;
3588     }
3589 
3590     /* read in other processors and ship out */
3591     for (i=1; i<size; i++) {
3592       nz     = procsnz[i];
3593       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3594       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3595     }
3596     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3597   } else {
3598     /* receive numeric values */
3599     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3600 
3601     /* receive message of values*/
3602     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3603 
3604     /* insert into matrix */
3605     jj      = rstart;
3606     smycols = mycols;
3607     svals   = vals;
3608     for (i=0; i<m; i++) {
3609       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3610       smycols += ourlens[i];
3611       svals   += ourlens[i];
3612       jj++;
3613     }
3614   }
3615   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3616   ierr = PetscFree(vals);CHKERRQ(ierr);
3617   ierr = PetscFree(mycols);CHKERRQ(ierr);
3618   ierr = PetscFree(rowners);CHKERRQ(ierr);
3619   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3620   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3621 
3622   PetscFunctionReturn(0);
3623 }
3624 
3625 #undef __FUNCT__
3626 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3627 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3628 {
3629   PetscErrorCode ierr;
3630   IS             iscol_local;
3631   PetscInt       csize;
3632 
3633   PetscFunctionBegin;
3634   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3635   if (call == MAT_REUSE_MATRIX) {
3636     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3637     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3638   } else {
3639     PetscInt cbs;
3640     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3641     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3642     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3643   }
3644   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3645   if (call == MAT_INITIAL_MATRIX) {
3646     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3647     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3648   }
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3653 #undef __FUNCT__
3654 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3655 /*
3656     Not great since it makes two copies of the submatrix, first an SeqAIJ
3657   in local and then by concatenating the local matrices the end result.
3658   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3659 
3660   Note: This requires a sequential iscol with all indices.
3661 */
3662 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3663 {
3664   PetscErrorCode ierr;
3665   PetscMPIInt    rank,size;
3666   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3667   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3668   PetscBool      allcolumns, colflag;
3669   Mat            M,Mreuse;
3670   MatScalar      *vwork,*aa;
3671   MPI_Comm       comm = ((PetscObject)mat)->comm;
3672   Mat_SeqAIJ     *aij;
3673 
3674 
3675   PetscFunctionBegin;
3676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3677   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3678 
3679   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3680   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3681   if (colflag && ncol == mat->cmap->N){
3682     allcolumns = PETSC_TRUE;
3683   } else {
3684     allcolumns = PETSC_FALSE;
3685   }
3686   if (call ==  MAT_REUSE_MATRIX) {
3687     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3688     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3689     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3690   } else {
3691     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3692   }
3693 
3694   /*
3695       m - number of local rows
3696       n - number of columns (same on all processors)
3697       rstart - first row in new global matrix generated
3698   */
3699   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3700   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3701   if (call == MAT_INITIAL_MATRIX) {
3702     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3703     ii  = aij->i;
3704     jj  = aij->j;
3705 
3706     /*
3707         Determine the number of non-zeros in the diagonal and off-diagonal
3708         portions of the matrix in order to do correct preallocation
3709     */
3710 
3711     /* first get start and end of "diagonal" columns */
3712     if (csize == PETSC_DECIDE) {
3713       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3714       if (mglobal == n) { /* square matrix */
3715 	nlocal = m;
3716       } else {
3717         nlocal = n/size + ((n % size) > rank);
3718       }
3719     } else {
3720       nlocal = csize;
3721     }
3722     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3723     rstart = rend - nlocal;
3724     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3725 
3726     /* next, compute all the lengths */
3727     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3728     olens = dlens + m;
3729     for (i=0; i<m; i++) {
3730       jend = ii[i+1] - ii[i];
3731       olen = 0;
3732       dlen = 0;
3733       for (j=0; j<jend; j++) {
3734         if (*jj < rstart || *jj >= rend) olen++;
3735         else dlen++;
3736         jj++;
3737       }
3738       olens[i] = olen;
3739       dlens[i] = dlen;
3740     }
3741     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3742     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3743     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3744     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3745     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3746     ierr = PetscFree(dlens);CHKERRQ(ierr);
3747   } else {
3748     PetscInt ml,nl;
3749 
3750     M = *newmat;
3751     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3752     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3753     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3754     /*
3755          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3756        rather than the slower MatSetValues().
3757     */
3758     M->was_assembled = PETSC_TRUE;
3759     M->assembled     = PETSC_FALSE;
3760   }
3761   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3762   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3763   ii  = aij->i;
3764   jj  = aij->j;
3765   aa  = aij->a;
3766   for (i=0; i<m; i++) {
3767     row   = rstart + i;
3768     nz    = ii[i+1] - ii[i];
3769     cwork = jj;     jj += nz;
3770     vwork = aa;     aa += nz;
3771     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3772   }
3773 
3774   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3775   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3776   *newmat = M;
3777 
3778   /* save submatrix used in processor for next request */
3779   if (call ==  MAT_INITIAL_MATRIX) {
3780     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3781     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3782   }
3783 
3784   PetscFunctionReturn(0);
3785 }
3786 
3787 EXTERN_C_BEGIN
3788 #undef __FUNCT__
3789 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3790 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3791 {
3792   PetscInt       m,cstart, cend,j,nnz,i,d;
3793   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3794   const PetscInt *JJ;
3795   PetscScalar    *values;
3796   PetscErrorCode ierr;
3797 
3798   PetscFunctionBegin;
3799   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3800 
3801   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3802   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3803   m      = B->rmap->n;
3804   cstart = B->cmap->rstart;
3805   cend   = B->cmap->rend;
3806   rstart = B->rmap->rstart;
3807 
3808   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3809 
3810 #if defined(PETSC_USE_DEBUGGING)
3811   for (i=0; i<m; i++) {
3812     nnz     = Ii[i+1]- Ii[i];
3813     JJ      = J + Ii[i];
3814     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3815     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3816     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3817   }
3818 #endif
3819 
3820   for (i=0; i<m; i++) {
3821     nnz     = Ii[i+1]- Ii[i];
3822     JJ      = J + Ii[i];
3823     nnz_max = PetscMax(nnz_max,nnz);
3824     d       = 0;
3825     for (j=0; j<nnz; j++) {
3826       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3827     }
3828     d_nnz[i] = d;
3829     o_nnz[i] = nnz - d;
3830   }
3831   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3832   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3833 
3834   if (v) values = (PetscScalar*)v;
3835   else {
3836     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3837     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3838   }
3839 
3840   for (i=0; i<m; i++) {
3841     ii   = i + rstart;
3842     nnz  = Ii[i+1]- Ii[i];
3843     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3844   }
3845   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3847 
3848   if (!v) {
3849     ierr = PetscFree(values);CHKERRQ(ierr);
3850   }
3851   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3852   PetscFunctionReturn(0);
3853 }
3854 EXTERN_C_END
3855 
3856 #undef __FUNCT__
3857 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3858 /*@
3859    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3860    (the default parallel PETSc format).
3861 
3862    Collective on MPI_Comm
3863 
3864    Input Parameters:
3865 +  B - the matrix
3866 .  i - the indices into j for the start of each local row (starts with zero)
3867 .  j - the column indices for each local row (starts with zero)
3868 -  v - optional values in the matrix
3869 
3870    Level: developer
3871 
3872    Notes:
3873        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3874      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3875      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3876 
3877        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3878 
3879        The format which is used for the sparse matrix input, is equivalent to a
3880     row-major ordering.. i.e for the following matrix, the input data expected is
3881     as shown:
3882 
3883         1 0 0
3884         2 0 3     P0
3885        -------
3886         4 5 6     P1
3887 
3888      Process0 [P0]: rows_owned=[0,1]
3889         i =  {0,1,3}  [size = nrow+1  = 2+1]
3890         j =  {0,0,2}  [size = nz = 6]
3891         v =  {1,2,3}  [size = nz = 6]
3892 
3893      Process1 [P1]: rows_owned=[2]
3894         i =  {0,3}    [size = nrow+1  = 1+1]
3895         j =  {0,1,2}  [size = nz = 6]
3896         v =  {4,5,6}  [size = nz = 6]
3897 
3898 .keywords: matrix, aij, compressed row, sparse, parallel
3899 
3900 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3901           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3902 @*/
3903 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3904 {
3905   PetscErrorCode ierr;
3906 
3907   PetscFunctionBegin;
3908   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3909   PetscFunctionReturn(0);
3910 }
3911 
3912 #undef __FUNCT__
3913 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3914 /*@C
3915    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3916    (the default parallel PETSc format).  For good matrix assembly performance
3917    the user should preallocate the matrix storage by setting the parameters
3918    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3919    performance can be increased by more than a factor of 50.
3920 
3921    Collective on MPI_Comm
3922 
3923    Input Parameters:
3924 +  A - the matrix
3925 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3926            (same value is used for all local rows)
3927 .  d_nnz - array containing the number of nonzeros in the various rows of the
3928            DIAGONAL portion of the local submatrix (possibly different for each row)
3929            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3930            The size of this array is equal to the number of local rows, i.e 'm'.
3931            For matrices that will be factored, you must leave room for (and set)
3932            the diagonal entry even if it is zero.
3933 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3934            submatrix (same value is used for all local rows).
3935 -  o_nnz - array containing the number of nonzeros in the various rows of the
3936            OFF-DIAGONAL portion of the local submatrix (possibly different for
3937            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3938            structure. The size of this array is equal to the number
3939            of local rows, i.e 'm'.
3940 
3941    If the *_nnz parameter is given then the *_nz parameter is ignored
3942 
3943    The AIJ format (also called the Yale sparse matrix format or
3944    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3945    storage.  The stored row and column indices begin with zero.
3946    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3947 
3948    The parallel matrix is partitioned such that the first m0 rows belong to
3949    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3950    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3951 
3952    The DIAGONAL portion of the local submatrix of a processor can be defined
3953    as the submatrix which is obtained by extraction the part corresponding to
3954    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3955    first row that belongs to the processor, r2 is the last row belonging to
3956    the this processor, and c1-c2 is range of indices of the local part of a
3957    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3958    common case of a square matrix, the row and column ranges are the same and
3959    the DIAGONAL part is also square. The remaining portion of the local
3960    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3961 
3962    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3963 
3964    You can call MatGetInfo() to get information on how effective the preallocation was;
3965    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3966    You can also run with the option -info and look for messages with the string
3967    malloc in them to see if additional memory allocation was needed.
3968 
3969    Example usage:
3970 
3971    Consider the following 8x8 matrix with 34 non-zero values, that is
3972    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3973    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3974    as follows:
3975 
3976 .vb
3977             1  2  0  |  0  3  0  |  0  4
3978     Proc0   0  5  6  |  7  0  0  |  8  0
3979             9  0 10  | 11  0  0  | 12  0
3980     -------------------------------------
3981            13  0 14  | 15 16 17  |  0  0
3982     Proc1   0 18  0  | 19 20 21  |  0  0
3983             0  0  0  | 22 23  0  | 24  0
3984     -------------------------------------
3985     Proc2  25 26 27  |  0  0 28  | 29  0
3986            30  0  0  | 31 32 33  |  0 34
3987 .ve
3988 
3989    This can be represented as a collection of submatrices as:
3990 
3991 .vb
3992       A B C
3993       D E F
3994       G H I
3995 .ve
3996 
3997    Where the submatrices A,B,C are owned by proc0, D,E,F are
3998    owned by proc1, G,H,I are owned by proc2.
3999 
4000    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4001    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4002    The 'M','N' parameters are 8,8, and have the same values on all procs.
4003 
4004    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4005    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4006    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4007    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4008    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4009    matrix, ans [DF] as another SeqAIJ matrix.
4010 
4011    When d_nz, o_nz parameters are specified, d_nz storage elements are
4012    allocated for every row of the local diagonal submatrix, and o_nz
4013    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4014    One way to choose d_nz and o_nz is to use the max nonzerors per local
4015    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4016    In this case, the values of d_nz,o_nz are:
4017 .vb
4018      proc0 : dnz = 2, o_nz = 2
4019      proc1 : dnz = 3, o_nz = 2
4020      proc2 : dnz = 1, o_nz = 4
4021 .ve
4022    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4023    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4024    for proc3. i.e we are using 12+15+10=37 storage locations to store
4025    34 values.
4026 
4027    When d_nnz, o_nnz parameters are specified, the storage is specified
4028    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4029    In the above case the values for d_nnz,o_nnz are:
4030 .vb
4031      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4032      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4033      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4034 .ve
4035    Here the space allocated is sum of all the above values i.e 34, and
4036    hence pre-allocation is perfect.
4037 
4038    Level: intermediate
4039 
4040 .keywords: matrix, aij, compressed row, sparse, parallel
4041 
4042 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4043           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4044 @*/
4045 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4046 {
4047   PetscErrorCode ierr;
4048 
4049   PetscFunctionBegin;
4050   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4051   PetscValidType(B,1);
4052   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4053   PetscFunctionReturn(0);
4054 }
4055 
4056 #undef __FUNCT__
4057 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4058 /*@
4059      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4060          CSR format the local rows.
4061 
4062    Collective on MPI_Comm
4063 
4064    Input Parameters:
4065 +  comm - MPI communicator
4066 .  m - number of local rows (Cannot be PETSC_DECIDE)
4067 .  n - This value should be the same as the local size used in creating the
4068        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4069        calculated if N is given) For square matrices n is almost always m.
4070 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4071 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4072 .   i - row indices
4073 .   j - column indices
4074 -   a - matrix values
4075 
4076    Output Parameter:
4077 .   mat - the matrix
4078 
4079    Level: intermediate
4080 
4081    Notes:
4082        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4083      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4084      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4085 
4086        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4087 
4088        The format which is used for the sparse matrix input, is equivalent to a
4089     row-major ordering.. i.e for the following matrix, the input data expected is
4090     as shown:
4091 
4092         1 0 0
4093         2 0 3     P0
4094        -------
4095         4 5 6     P1
4096 
4097      Process0 [P0]: rows_owned=[0,1]
4098         i =  {0,1,3}  [size = nrow+1  = 2+1]
4099         j =  {0,0,2}  [size = nz = 6]
4100         v =  {1,2,3}  [size = nz = 6]
4101 
4102      Process1 [P1]: rows_owned=[2]
4103         i =  {0,3}    [size = nrow+1  = 1+1]
4104         j =  {0,1,2}  [size = nz = 6]
4105         v =  {4,5,6}  [size = nz = 6]
4106 
4107 .keywords: matrix, aij, compressed row, sparse, parallel
4108 
4109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4110           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4111 @*/
4112 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4113 {
4114   PetscErrorCode ierr;
4115 
4116   PetscFunctionBegin;
4117   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4118   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4119   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4120   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4121   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4122   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4123   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4124   PetscFunctionReturn(0);
4125 }
4126 
4127 #undef __FUNCT__
4128 #define __FUNCT__ "MatCreateAIJ"
4129 /*@C
4130    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4131    (the default parallel PETSc format).  For good matrix assembly performance
4132    the user should preallocate the matrix storage by setting the parameters
4133    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4134    performance can be increased by more than a factor of 50.
4135 
4136    Collective on MPI_Comm
4137 
4138    Input Parameters:
4139 +  comm - MPI communicator
4140 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4141            This value should be the same as the local size used in creating the
4142            y vector for the matrix-vector product y = Ax.
4143 .  n - This value should be the same as the local size used in creating the
4144        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4145        calculated if N is given) For square matrices n is almost always m.
4146 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4147 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4148 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4149            (same value is used for all local rows)
4150 .  d_nnz - array containing the number of nonzeros in the various rows of the
4151            DIAGONAL portion of the local submatrix (possibly different for each row)
4152            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4153            The size of this array is equal to the number of local rows, i.e 'm'.
4154 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4155            submatrix (same value is used for all local rows).
4156 -  o_nnz - array containing the number of nonzeros in the various rows of the
4157            OFF-DIAGONAL portion of the local submatrix (possibly different for
4158            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4159            structure. The size of this array is equal to the number
4160            of local rows, i.e 'm'.
4161 
4162    Output Parameter:
4163 .  A - the matrix
4164 
4165    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4166    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4167    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4168 
4169    Notes:
4170    If the *_nnz parameter is given then the *_nz parameter is ignored
4171 
4172    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4173    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4174    storage requirements for this matrix.
4175 
4176    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4177    processor than it must be used on all processors that share the object for
4178    that argument.
4179 
4180    The user MUST specify either the local or global matrix dimensions
4181    (possibly both).
4182 
4183    The parallel matrix is partitioned across processors such that the
4184    first m0 rows belong to process 0, the next m1 rows belong to
4185    process 1, the next m2 rows belong to process 2 etc.. where
4186    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4187    values corresponding to [m x N] submatrix.
4188 
4189    The columns are logically partitioned with the n0 columns belonging
4190    to 0th partition, the next n1 columns belonging to the next
4191    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4192 
4193    The DIAGONAL portion of the local submatrix on any given processor
4194    is the submatrix corresponding to the rows and columns m,n
4195    corresponding to the given processor. i.e diagonal matrix on
4196    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4197    etc. The remaining portion of the local submatrix [m x (N-n)]
4198    constitute the OFF-DIAGONAL portion. The example below better
4199    illustrates this concept.
4200 
4201    For a square global matrix we define each processor's diagonal portion
4202    to be its local rows and the corresponding columns (a square submatrix);
4203    each processor's off-diagonal portion encompasses the remainder of the
4204    local matrix (a rectangular submatrix).
4205 
4206    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4207 
4208    When calling this routine with a single process communicator, a matrix of
4209    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4210    type of communicator, use the construction mechanism:
4211      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4212 
4213    By default, this format uses inodes (identical nodes) when possible.
4214    We search for consecutive rows with the same nonzero structure, thereby
4215    reusing matrix information to achieve increased efficiency.
4216 
4217    Options Database Keys:
4218 +  -mat_no_inode  - Do not use inodes
4219 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4220 -  -mat_aij_oneindex - Internally use indexing starting at 1
4221         rather than 0.  Note that when calling MatSetValues(),
4222         the user still MUST index entries starting at 0!
4223 
4224 
4225    Example usage:
4226 
4227    Consider the following 8x8 matrix with 34 non-zero values, that is
4228    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4229    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4230    as follows:
4231 
4232 .vb
4233             1  2  0  |  0  3  0  |  0  4
4234     Proc0   0  5  6  |  7  0  0  |  8  0
4235             9  0 10  | 11  0  0  | 12  0
4236     -------------------------------------
4237            13  0 14  | 15 16 17  |  0  0
4238     Proc1   0 18  0  | 19 20 21  |  0  0
4239             0  0  0  | 22 23  0  | 24  0
4240     -------------------------------------
4241     Proc2  25 26 27  |  0  0 28  | 29  0
4242            30  0  0  | 31 32 33  |  0 34
4243 .ve
4244 
4245    This can be represented as a collection of submatrices as:
4246 
4247 .vb
4248       A B C
4249       D E F
4250       G H I
4251 .ve
4252 
4253    Where the submatrices A,B,C are owned by proc0, D,E,F are
4254    owned by proc1, G,H,I are owned by proc2.
4255 
4256    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4257    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4258    The 'M','N' parameters are 8,8, and have the same values on all procs.
4259 
4260    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4261    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4262    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4263    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4264    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4265    matrix, ans [DF] as another SeqAIJ matrix.
4266 
4267    When d_nz, o_nz parameters are specified, d_nz storage elements are
4268    allocated for every row of the local diagonal submatrix, and o_nz
4269    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4270    One way to choose d_nz and o_nz is to use the max nonzerors per local
4271    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4272    In this case, the values of d_nz,o_nz are:
4273 .vb
4274      proc0 : dnz = 2, o_nz = 2
4275      proc1 : dnz = 3, o_nz = 2
4276      proc2 : dnz = 1, o_nz = 4
4277 .ve
4278    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4279    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4280    for proc3. i.e we are using 12+15+10=37 storage locations to store
4281    34 values.
4282 
4283    When d_nnz, o_nnz parameters are specified, the storage is specified
4284    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4285    In the above case the values for d_nnz,o_nnz are:
4286 .vb
4287      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4288      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4289      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4290 .ve
4291    Here the space allocated is sum of all the above values i.e 34, and
4292    hence pre-allocation is perfect.
4293 
4294    Level: intermediate
4295 
4296 .keywords: matrix, aij, compressed row, sparse, parallel
4297 
4298 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4299           MPIAIJ, MatCreateMPIAIJWithArrays()
4300 @*/
4301 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4302 {
4303   PetscErrorCode ierr;
4304   PetscMPIInt    size;
4305 
4306   PetscFunctionBegin;
4307   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4308   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4309   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4310   if (size > 1) {
4311     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4312     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4313   } else {
4314     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4315     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4316   }
4317   PetscFunctionReturn(0);
4318 }
4319 
4320 #undef __FUNCT__
4321 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4322 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4323 {
4324   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4325 
4326   PetscFunctionBegin;
4327   *Ad     = a->A;
4328   *Ao     = a->B;
4329   *colmap = a->garray;
4330   PetscFunctionReturn(0);
4331 }
4332 
4333 #undef __FUNCT__
4334 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4335 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4336 {
4337   PetscErrorCode ierr;
4338   PetscInt       i;
4339   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4340 
4341   PetscFunctionBegin;
4342   if (coloring->ctype == IS_COLORING_GLOBAL) {
4343     ISColoringValue *allcolors,*colors;
4344     ISColoring      ocoloring;
4345 
4346     /* set coloring for diagonal portion */
4347     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4348 
4349     /* set coloring for off-diagonal portion */
4350     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4351     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4352     for (i=0; i<a->B->cmap->n; i++) {
4353       colors[i] = allcolors[a->garray[i]];
4354     }
4355     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4356     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4357     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4358     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4359   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4360     ISColoringValue *colors;
4361     PetscInt        *larray;
4362     ISColoring      ocoloring;
4363 
4364     /* set coloring for diagonal portion */
4365     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4366     for (i=0; i<a->A->cmap->n; i++) {
4367       larray[i] = i + A->cmap->rstart;
4368     }
4369     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4370     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4371     for (i=0; i<a->A->cmap->n; i++) {
4372       colors[i] = coloring->colors[larray[i]];
4373     }
4374     ierr = PetscFree(larray);CHKERRQ(ierr);
4375     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4376     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4377     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4378 
4379     /* set coloring for off-diagonal portion */
4380     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4381     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4382     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4383     for (i=0; i<a->B->cmap->n; i++) {
4384       colors[i] = coloring->colors[larray[i]];
4385     }
4386     ierr = PetscFree(larray);CHKERRQ(ierr);
4387     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4388     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4389     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4390   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4391 
4392   PetscFunctionReturn(0);
4393 }
4394 
4395 #undef __FUNCT__
4396 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4397 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4398 {
4399   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4400   PetscErrorCode ierr;
4401 
4402   PetscFunctionBegin;
4403   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4404   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4410 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4411 {
4412   PetscErrorCode ierr;
4413   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4414   PetscInt       *indx;
4415 
4416   PetscFunctionBegin;
4417   /* This routine will ONLY return MPIAIJ type matrix */
4418   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4419   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4420   if (n == PETSC_DECIDE){
4421     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4422   }
4423   /* Check sum(n) = N */
4424   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4425   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4426 
4427   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4428   rstart -= m;
4429 
4430   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4431   for (i=0;i<m;i++) {
4432     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4433     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4434     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4435   }
4436 
4437   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4438   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4439   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4440   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4441   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4442   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4443   PetscFunctionReturn(0);
4444 }
4445 
4446 #undef __FUNCT__
4447 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4448 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4449 {
4450   PetscErrorCode ierr;
4451   PetscInt       m,N,i,rstart,nnz,Ii;
4452   PetscInt       *indx;
4453   PetscScalar    *values;
4454 
4455   PetscFunctionBegin;
4456   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4457   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4458   for (i=0;i<m;i++) {
4459     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4460     Ii    = i + rstart;
4461     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4462     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4463   }
4464   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4465   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4466   PetscFunctionReturn(0);
4467 }
4468 
4469 #undef __FUNCT__
4470 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4471 /*@
4472       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4473                  matrices from each processor
4474 
4475     Collective on MPI_Comm
4476 
4477    Input Parameters:
4478 +    comm - the communicators the parallel matrix will live on
4479 .    inmat - the input sequential matrices
4480 .    n - number of local columns (or PETSC_DECIDE)
4481 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4482 
4483    Output Parameter:
4484 .    outmat - the parallel matrix generated
4485 
4486     Level: advanced
4487 
4488    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4489 
4490 @*/
4491 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4492 {
4493   PetscErrorCode ierr;
4494 
4495   PetscFunctionBegin;
4496   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4497   if (scall == MAT_INITIAL_MATRIX){
4498     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4499   }
4500   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4501   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4502   PetscFunctionReturn(0);
4503 }
4504 
4505 #undef __FUNCT__
4506 #define __FUNCT__ "MatFileSplit"
4507 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4508 {
4509   PetscErrorCode    ierr;
4510   PetscMPIInt       rank;
4511   PetscInt          m,N,i,rstart,nnz;
4512   size_t            len;
4513   const PetscInt    *indx;
4514   PetscViewer       out;
4515   char              *name;
4516   Mat               B;
4517   const PetscScalar *values;
4518 
4519   PetscFunctionBegin;
4520   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4521   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4522   /* Should this be the type of the diagonal block of A? */
4523   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4524   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4525   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4526   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4527   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4528   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4529   for (i=0;i<m;i++) {
4530     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4531     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4532     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4533   }
4534   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4535   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4536 
4537   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4538   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4539   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4540   sprintf(name,"%s.%d",outfile,rank);
4541   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4542   ierr = PetscFree(name);CHKERRQ(ierr);
4543   ierr = MatView(B,out);CHKERRQ(ierr);
4544   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4545   ierr = MatDestroy(&B);CHKERRQ(ierr);
4546   PetscFunctionReturn(0);
4547 }
4548 
4549 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4550 #undef __FUNCT__
4551 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4552 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4553 {
4554   PetscErrorCode       ierr;
4555   Mat_Merge_SeqsToMPI  *merge;
4556   PetscContainer       container;
4557 
4558   PetscFunctionBegin;
4559   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4560   if (container) {
4561     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4564     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4565     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4566     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4568     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4574     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4575     ierr = PetscFree(merge);CHKERRQ(ierr);
4576     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4577   }
4578   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4579   PetscFunctionReturn(0);
4580 }
4581 
4582 #include <../src/mat/utils/freespace.h>
4583 #include <petscbt.h>
4584 
4585 #undef __FUNCT__
4586 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4587 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4588 {
4589   PetscErrorCode       ierr;
4590   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4591   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4592   PetscMPIInt          size,rank,taga,*len_s;
4593   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4594   PetscInt             proc,m;
4595   PetscInt             **buf_ri,**buf_rj;
4596   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4597   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4598   MPI_Request          *s_waits,*r_waits;
4599   MPI_Status           *status;
4600   MatScalar            *aa=a->a;
4601   MatScalar            **abuf_r,*ba_i;
4602   Mat_Merge_SeqsToMPI  *merge;
4603   PetscContainer       container;
4604 
4605   PetscFunctionBegin;
4606   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4607 
4608   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4609   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4610 
4611   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4612   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4613 
4614   bi     = merge->bi;
4615   bj     = merge->bj;
4616   buf_ri = merge->buf_ri;
4617   buf_rj = merge->buf_rj;
4618 
4619   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4620   owners = merge->rowmap->range;
4621   len_s  = merge->len_s;
4622 
4623   /* send and recv matrix values */
4624   /*-----------------------------*/
4625   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4626   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4627 
4628   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4629   for (proc=0,k=0; proc<size; proc++){
4630     if (!len_s[proc]) continue;
4631     i = owners[proc];
4632     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4633     k++;
4634   }
4635 
4636   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4637   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4638   ierr = PetscFree(status);CHKERRQ(ierr);
4639 
4640   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4641   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4642 
4643   /* insert mat values of mpimat */
4644   /*----------------------------*/
4645   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4646   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4647 
4648   for (k=0; k<merge->nrecv; k++){
4649     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4650     nrows = *(buf_ri_k[k]);
4651     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4652     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4653   }
4654 
4655   /* set values of ba */
4656   m = merge->rowmap->n;
4657   for (i=0; i<m; i++) {
4658     arow = owners[rank] + i;
4659     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4660     bnzi = bi[i+1] - bi[i];
4661     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4662 
4663     /* add local non-zero vals of this proc's seqmat into ba */
4664     anzi = ai[arow+1] - ai[arow];
4665     aj   = a->j + ai[arow];
4666     aa   = a->a + ai[arow];
4667     nextaj = 0;
4668     for (j=0; nextaj<anzi; j++){
4669       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4670         ba_i[j] += aa[nextaj++];
4671       }
4672     }
4673 
4674     /* add received vals into ba */
4675     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4676       /* i-th row */
4677       if (i == *nextrow[k]) {
4678         anzi = *(nextai[k]+1) - *nextai[k];
4679         aj   = buf_rj[k] + *(nextai[k]);
4680         aa   = abuf_r[k] + *(nextai[k]);
4681         nextaj = 0;
4682         for (j=0; nextaj<anzi; j++){
4683           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4684             ba_i[j] += aa[nextaj++];
4685           }
4686         }
4687         nextrow[k]++; nextai[k]++;
4688       }
4689     }
4690     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4691   }
4692   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4693   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4694 
4695   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4696   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4697   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4698   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4699   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4700   PetscFunctionReturn(0);
4701 }
4702 
4703 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4704 
4705 #undef __FUNCT__
4706 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4707 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4708 {
4709   PetscErrorCode       ierr;
4710   Mat                  B_mpi;
4711   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4712   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4713   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4714   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4715   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4716   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4717   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4718   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4719   MPI_Status           *status;
4720   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4721   PetscBT              lnkbt;
4722   Mat_Merge_SeqsToMPI  *merge;
4723   PetscContainer       container;
4724 
4725   PetscFunctionBegin;
4726   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4727 
4728   /* make sure it is a PETSc comm */
4729   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4730   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4731   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4732 
4733   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4734   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4735 
4736   /* determine row ownership */
4737   /*---------------------------------------------------------*/
4738   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4739   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4740   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4741   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4742   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4743   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4744   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4745 
4746   m      = merge->rowmap->n;
4747   owners = merge->rowmap->range;
4748 
4749   /* determine the number of messages to send, their lengths */
4750   /*---------------------------------------------------------*/
4751   len_s  = merge->len_s;
4752 
4753   len = 0;  /* length of buf_si[] */
4754   merge->nsend = 0;
4755   for (proc=0; proc<size; proc++){
4756     len_si[proc] = 0;
4757     if (proc == rank){
4758       len_s[proc] = 0;
4759     } else {
4760       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4761       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4762     }
4763     if (len_s[proc]) {
4764       merge->nsend++;
4765       nrows = 0;
4766       for (i=owners[proc]; i<owners[proc+1]; i++){
4767         if (ai[i+1] > ai[i]) nrows++;
4768       }
4769       len_si[proc] = 2*(nrows+1);
4770       len += len_si[proc];
4771     }
4772   }
4773 
4774   /* determine the number and length of messages to receive for ij-structure */
4775   /*-------------------------------------------------------------------------*/
4776   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4777   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4778 
4779   /* post the Irecv of j-structure */
4780   /*-------------------------------*/
4781   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4782   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4783 
4784   /* post the Isend of j-structure */
4785   /*--------------------------------*/
4786   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4787 
4788   for (proc=0, k=0; proc<size; proc++){
4789     if (!len_s[proc]) continue;
4790     i = owners[proc];
4791     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4792     k++;
4793   }
4794 
4795   /* receives and sends of j-structure are complete */
4796   /*------------------------------------------------*/
4797   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4798   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4799 
4800   /* send and recv i-structure */
4801   /*---------------------------*/
4802   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4803   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4804 
4805   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4806   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4807   for (proc=0,k=0; proc<size; proc++){
4808     if (!len_s[proc]) continue;
4809     /* form outgoing message for i-structure:
4810          buf_si[0]:                 nrows to be sent
4811                [1:nrows]:           row index (global)
4812                [nrows+1:2*nrows+1]: i-structure index
4813     */
4814     /*-------------------------------------------*/
4815     nrows = len_si[proc]/2 - 1;
4816     buf_si_i    = buf_si + nrows+1;
4817     buf_si[0]   = nrows;
4818     buf_si_i[0] = 0;
4819     nrows = 0;
4820     for (i=owners[proc]; i<owners[proc+1]; i++){
4821       anzi = ai[i+1] - ai[i];
4822       if (anzi) {
4823         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4824         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4825         nrows++;
4826       }
4827     }
4828     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4829     k++;
4830     buf_si += len_si[proc];
4831   }
4832 
4833   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4834   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4835 
4836   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4837   for (i=0; i<merge->nrecv; i++){
4838     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4839   }
4840 
4841   ierr = PetscFree(len_si);CHKERRQ(ierr);
4842   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4843   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4844   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4845   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4846   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4847   ierr = PetscFree(status);CHKERRQ(ierr);
4848 
4849   /* compute a local seq matrix in each processor */
4850   /*----------------------------------------------*/
4851   /* allocate bi array and free space for accumulating nonzero column info */
4852   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4853   bi[0] = 0;
4854 
4855   /* create and initialize a linked list */
4856   nlnk = N+1;
4857   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4858 
4859   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4860   len  = ai[owners[rank+1]] - ai[owners[rank]];
4861   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4862   current_space = free_space;
4863 
4864   /* determine symbolic info for each local row */
4865   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4866 
4867   for (k=0; k<merge->nrecv; k++){
4868     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4869     nrows = *buf_ri_k[k];
4870     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4871     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4872   }
4873 
4874   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4875   len = 0;
4876   for (i=0;i<m;i++) {
4877     bnzi   = 0;
4878     /* add local non-zero cols of this proc's seqmat into lnk */
4879     arow   = owners[rank] + i;
4880     anzi   = ai[arow+1] - ai[arow];
4881     aj     = a->j + ai[arow];
4882     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4883     bnzi += nlnk;
4884     /* add received col data into lnk */
4885     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4886       if (i == *nextrow[k]) { /* i-th row */
4887         anzi = *(nextai[k]+1) - *nextai[k];
4888         aj   = buf_rj[k] + *nextai[k];
4889         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4890         bnzi += nlnk;
4891         nextrow[k]++; nextai[k]++;
4892       }
4893     }
4894     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4895 
4896     /* if free space is not available, make more free space */
4897     if (current_space->local_remaining<bnzi) {
4898       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4899       nspacedouble++;
4900     }
4901     /* copy data into free space, then initialize lnk */
4902     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4903     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4904 
4905     current_space->array           += bnzi;
4906     current_space->local_used      += bnzi;
4907     current_space->local_remaining -= bnzi;
4908 
4909     bi[i+1] = bi[i] + bnzi;
4910   }
4911 
4912   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4913 
4914   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4915   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4916   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4917 
4918   /* create symbolic parallel matrix B_mpi */
4919   /*---------------------------------------*/
4920     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4921   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4922   if (n==PETSC_DECIDE) {
4923     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4924   } else {
4925     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4926   }
4927   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4928   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4929   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4930   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4931   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4932 
4933   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4934   B_mpi->assembled     = PETSC_FALSE;
4935   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4936   merge->bi            = bi;
4937   merge->bj            = bj;
4938   merge->buf_ri        = buf_ri;
4939   merge->buf_rj        = buf_rj;
4940   merge->coi           = PETSC_NULL;
4941   merge->coj           = PETSC_NULL;
4942   merge->owners_co     = PETSC_NULL;
4943 
4944   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4945 
4946   /* attach the supporting struct to B_mpi for reuse */
4947   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4948   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4949   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4950   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4951   *mpimat = B_mpi;
4952 
4953   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4954   PetscFunctionReturn(0);
4955 }
4956 
4957 #undef __FUNCT__
4958 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4959 /*@C
4960       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4961                  matrices from each processor
4962 
4963     Collective on MPI_Comm
4964 
4965    Input Parameters:
4966 +    comm - the communicators the parallel matrix will live on
4967 .    seqmat - the input sequential matrices
4968 .    m - number of local rows (or PETSC_DECIDE)
4969 .    n - number of local columns (or PETSC_DECIDE)
4970 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4971 
4972    Output Parameter:
4973 .    mpimat - the parallel matrix generated
4974 
4975     Level: advanced
4976 
4977    Notes:
4978      The dimensions of the sequential matrix in each processor MUST be the same.
4979      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4980      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4981 @*/
4982 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4983 {
4984   PetscErrorCode   ierr;
4985   PetscMPIInt     size;
4986 
4987   PetscFunctionBegin;
4988   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4989   if (size == 1){
4990      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4991      if (scall == MAT_INITIAL_MATRIX){
4992        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4993      } else {
4994        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4995      }
4996      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4997      PetscFunctionReturn(0);
4998   }
4999   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5000   if (scall == MAT_INITIAL_MATRIX){
5001     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5002   }
5003   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5004   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005   PetscFunctionReturn(0);
5006 }
5007 
5008 #undef __FUNCT__
5009 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5010 /*@
5011      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5012           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5013           with MatGetSize()
5014 
5015     Not Collective
5016 
5017    Input Parameters:
5018 +    A - the matrix
5019 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5020 
5021    Output Parameter:
5022 .    A_loc - the local sequential matrix generated
5023 
5024     Level: developer
5025 
5026 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5027 
5028 @*/
5029 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5030 {
5031   PetscErrorCode  ierr;
5032   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5033   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5034   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5035   MatScalar       *aa=a->a,*ba=b->a,*cam;
5036   PetscScalar     *ca;
5037   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5038   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5039   PetscBool       match;
5040 
5041   PetscFunctionBegin;
5042   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5043   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5044   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5045   if (scall == MAT_INITIAL_MATRIX){
5046     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5047     ci[0] = 0;
5048     for (i=0; i<am; i++){
5049       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5050     }
5051     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5052     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5053     k = 0;
5054     for (i=0; i<am; i++) {
5055       ncols_o = bi[i+1] - bi[i];
5056       ncols_d = ai[i+1] - ai[i];
5057       /* off-diagonal portion of A */
5058       for (jo=0; jo<ncols_o; jo++) {
5059         col = cmap[*bj];
5060         if (col >= cstart) break;
5061         cj[k]   = col; bj++;
5062         ca[k++] = *ba++;
5063       }
5064       /* diagonal portion of A */
5065       for (j=0; j<ncols_d; j++) {
5066         cj[k]   = cstart + *aj++;
5067         ca[k++] = *aa++;
5068       }
5069       /* off-diagonal portion of A */
5070       for (j=jo; j<ncols_o; j++) {
5071         cj[k]   = cmap[*bj++];
5072         ca[k++] = *ba++;
5073       }
5074     }
5075     /* put together the new matrix */
5076     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5077     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5078     /* Since these are PETSc arrays, change flags to free them as necessary. */
5079     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5080     mat->free_a  = PETSC_TRUE;
5081     mat->free_ij = PETSC_TRUE;
5082     mat->nonew   = 0;
5083   } else if (scall == MAT_REUSE_MATRIX){
5084     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5085     ci = mat->i; cj = mat->j; cam = mat->a;
5086     for (i=0; i<am; i++) {
5087       /* off-diagonal portion of A */
5088       ncols_o = bi[i+1] - bi[i];
5089       for (jo=0; jo<ncols_o; jo++) {
5090         col = cmap[*bj];
5091         if (col >= cstart) break;
5092         *cam++ = *ba++; bj++;
5093       }
5094       /* diagonal portion of A */
5095       ncols_d = ai[i+1] - ai[i];
5096       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5097       /* off-diagonal portion of A */
5098       for (j=jo; j<ncols_o; j++) {
5099         *cam++ = *ba++; bj++;
5100       }
5101     }
5102   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5103   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5104   PetscFunctionReturn(0);
5105 }
5106 
5107 #undef __FUNCT__
5108 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5109 /*@C
5110      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5111 
5112     Not Collective
5113 
5114    Input Parameters:
5115 +    A - the matrix
5116 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5117 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5118 
5119    Output Parameter:
5120 .    A_loc - the local sequential matrix generated
5121 
5122     Level: developer
5123 
5124 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5125 
5126 @*/
5127 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5128 {
5129   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5130   PetscErrorCode    ierr;
5131   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5132   IS                isrowa,iscola;
5133   Mat               *aloc;
5134   PetscBool       match;
5135 
5136   PetscFunctionBegin;
5137   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5138   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5139   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5140   if (!row){
5141     start = A->rmap->rstart; end = A->rmap->rend;
5142     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5143   } else {
5144     isrowa = *row;
5145   }
5146   if (!col){
5147     start = A->cmap->rstart;
5148     cmap  = a->garray;
5149     nzA   = a->A->cmap->n;
5150     nzB   = a->B->cmap->n;
5151     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5152     ncols = 0;
5153     for (i=0; i<nzB; i++) {
5154       if (cmap[i] < start) idx[ncols++] = cmap[i];
5155       else break;
5156     }
5157     imark = i;
5158     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5159     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5160     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5161   } else {
5162     iscola = *col;
5163   }
5164   if (scall != MAT_INITIAL_MATRIX){
5165     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5166     aloc[0] = *A_loc;
5167   }
5168   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5169   *A_loc = aloc[0];
5170   ierr = PetscFree(aloc);CHKERRQ(ierr);
5171   if (!row){
5172     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5173   }
5174   if (!col){
5175     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5176   }
5177   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5178   PetscFunctionReturn(0);
5179 }
5180 
5181 #undef __FUNCT__
5182 #define __FUNCT__ "MatGetBrowsOfAcols"
5183 /*@C
5184     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5185 
5186     Collective on Mat
5187 
5188    Input Parameters:
5189 +    A,B - the matrices in mpiaij format
5190 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5191 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5192 
5193    Output Parameter:
5194 +    rowb, colb - index sets of rows and columns of B to extract
5195 -    B_seq - the sequential matrix generated
5196 
5197     Level: developer
5198 
5199 @*/
5200 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5201 {
5202   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5203   PetscErrorCode    ierr;
5204   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5205   IS                isrowb,iscolb;
5206   Mat               *bseq=PETSC_NULL;
5207 
5208   PetscFunctionBegin;
5209   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5210     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5211   }
5212   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5213 
5214   if (scall == MAT_INITIAL_MATRIX){
5215     start = A->cmap->rstart;
5216     cmap  = a->garray;
5217     nzA   = a->A->cmap->n;
5218     nzB   = a->B->cmap->n;
5219     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5220     ncols = 0;
5221     for (i=0; i<nzB; i++) {  /* row < local row index */
5222       if (cmap[i] < start) idx[ncols++] = cmap[i];
5223       else break;
5224     }
5225     imark = i;
5226     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5227     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5228     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5229     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5230   } else {
5231     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5232     isrowb = *rowb; iscolb = *colb;
5233     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5234     bseq[0] = *B_seq;
5235   }
5236   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5237   *B_seq = bseq[0];
5238   ierr = PetscFree(bseq);CHKERRQ(ierr);
5239   if (!rowb){
5240     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5241   } else {
5242     *rowb = isrowb;
5243   }
5244   if (!colb){
5245     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5246   } else {
5247     *colb = iscolb;
5248   }
5249   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5250   PetscFunctionReturn(0);
5251 }
5252 
5253 #undef __FUNCT__
5254 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5255 /*
5256     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5257     of the OFF-DIAGONAL portion of local A
5258 
5259     Collective on Mat
5260 
5261    Input Parameters:
5262 +    A,B - the matrices in mpiaij format
5263 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5264 
5265    Output Parameter:
5266 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5267 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5268 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5269 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5270 
5271     Level: developer
5272 
5273 */
5274 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5275 {
5276   VecScatter_MPI_General *gen_to,*gen_from;
5277   PetscErrorCode         ierr;
5278   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5279   Mat_SeqAIJ             *b_oth;
5280   VecScatter             ctx=a->Mvctx;
5281   MPI_Comm               comm=((PetscObject)ctx)->comm;
5282   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5283   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5284   PetscScalar            *rvalues,*svalues;
5285   MatScalar              *b_otha,*bufa,*bufA;
5286   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5287   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5288   MPI_Status             *sstatus,rstatus;
5289   PetscMPIInt            jj;
5290   PetscInt               *cols,sbs,rbs;
5291   PetscScalar            *vals;
5292 
5293   PetscFunctionBegin;
5294   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5295     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5296   }
5297   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5298   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5299 
5300   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5301   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5302   rvalues  = gen_from->values; /* holds the length of receiving row */
5303   svalues  = gen_to->values;   /* holds the length of sending row */
5304   nrecvs   = gen_from->n;
5305   nsends   = gen_to->n;
5306 
5307   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5308   srow     = gen_to->indices;   /* local row index to be sent */
5309   sstarts  = gen_to->starts;
5310   sprocs   = gen_to->procs;
5311   sstatus  = gen_to->sstatus;
5312   sbs      = gen_to->bs;
5313   rstarts  = gen_from->starts;
5314   rprocs   = gen_from->procs;
5315   rbs      = gen_from->bs;
5316 
5317   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5318   if (scall == MAT_INITIAL_MATRIX){
5319     /* i-array */
5320     /*---------*/
5321     /*  post receives */
5322     for (i=0; i<nrecvs; i++){
5323       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5324       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5325       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5326     }
5327 
5328     /* pack the outgoing message */
5329     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5330     sstartsj[0] = 0;  rstartsj[0] = 0;
5331     len = 0; /* total length of j or a array to be sent */
5332     k = 0;
5333     for (i=0; i<nsends; i++){
5334       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5335       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5336       for (j=0; j<nrows; j++) {
5337         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5338         for (l=0; l<sbs; l++){
5339           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5340           rowlen[j*sbs+l] = ncols;
5341           len += ncols;
5342           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5343         }
5344         k++;
5345       }
5346       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5347       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5348     }
5349     /* recvs and sends of i-array are completed */
5350     i = nrecvs;
5351     while (i--) {
5352       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5353     }
5354     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5355 
5356     /* allocate buffers for sending j and a arrays */
5357     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5358     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5359 
5360     /* create i-array of B_oth */
5361     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5362     b_othi[0] = 0;
5363     len = 0; /* total length of j or a array to be received */
5364     k = 0;
5365     for (i=0; i<nrecvs; i++){
5366       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5367       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5368       for (j=0; j<nrows; j++) {
5369         b_othi[k+1] = b_othi[k] + rowlen[j];
5370         len += rowlen[j]; k++;
5371       }
5372       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5373     }
5374 
5375     /* allocate space for j and a arrrays of B_oth */
5376     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5377     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5378 
5379     /* j-array */
5380     /*---------*/
5381     /*  post receives of j-array */
5382     for (i=0; i<nrecvs; i++){
5383       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5384       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5385     }
5386 
5387     /* pack the outgoing message j-array */
5388     k = 0;
5389     for (i=0; i<nsends; i++){
5390       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5391       bufJ = bufj+sstartsj[i];
5392       for (j=0; j<nrows; j++) {
5393         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5394         for (ll=0; ll<sbs; ll++){
5395           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5396           for (l=0; l<ncols; l++){
5397             *bufJ++ = cols[l];
5398           }
5399           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5400         }
5401       }
5402       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5403     }
5404 
5405     /* recvs and sends of j-array are completed */
5406     i = nrecvs;
5407     while (i--) {
5408       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5409     }
5410     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5411   } else if (scall == MAT_REUSE_MATRIX){
5412     sstartsj = *startsj_s;
5413     rstartsj = *startsj_r;
5414     bufa     = *bufa_ptr;
5415     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5416     b_otha   = b_oth->a;
5417   } else {
5418     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5419   }
5420 
5421   /* a-array */
5422   /*---------*/
5423   /*  post receives of a-array */
5424   for (i=0; i<nrecvs; i++){
5425     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5426     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5427   }
5428 
5429   /* pack the outgoing message a-array */
5430   k = 0;
5431   for (i=0; i<nsends; i++){
5432     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5433     bufA = bufa+sstartsj[i];
5434     for (j=0; j<nrows; j++) {
5435       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5436       for (ll=0; ll<sbs; ll++){
5437         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5438         for (l=0; l<ncols; l++){
5439           *bufA++ = vals[l];
5440         }
5441         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5442       }
5443     }
5444     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5445   }
5446   /* recvs and sends of a-array are completed */
5447   i = nrecvs;
5448   while (i--) {
5449     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5450   }
5451   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5452   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5453 
5454   if (scall == MAT_INITIAL_MATRIX){
5455     /* put together the new matrix */
5456     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5457 
5458     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5459     /* Since these are PETSc arrays, change flags to free them as necessary. */
5460     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5461     b_oth->free_a  = PETSC_TRUE;
5462     b_oth->free_ij = PETSC_TRUE;
5463     b_oth->nonew   = 0;
5464 
5465     ierr = PetscFree(bufj);CHKERRQ(ierr);
5466     if (!startsj_s || !bufa_ptr){
5467       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5468       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5469     } else {
5470       *startsj_s = sstartsj;
5471       *startsj_r = rstartsj;
5472       *bufa_ptr  = bufa;
5473     }
5474   }
5475   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5476   PetscFunctionReturn(0);
5477 }
5478 
5479 #undef __FUNCT__
5480 #define __FUNCT__ "MatGetCommunicationStructs"
5481 /*@C
5482   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5483 
5484   Not Collective
5485 
5486   Input Parameters:
5487 . A - The matrix in mpiaij format
5488 
5489   Output Parameter:
5490 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5491 . colmap - A map from global column index to local index into lvec
5492 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5493 
5494   Level: developer
5495 
5496 @*/
5497 #if defined (PETSC_USE_CTABLE)
5498 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5499 #else
5500 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5501 #endif
5502 {
5503   Mat_MPIAIJ *a;
5504 
5505   PetscFunctionBegin;
5506   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5507   PetscValidPointer(lvec, 2);
5508   PetscValidPointer(colmap, 3);
5509   PetscValidPointer(multScatter, 4);
5510   a = (Mat_MPIAIJ *) A->data;
5511   if (lvec) *lvec = a->lvec;
5512   if (colmap) *colmap = a->colmap;
5513   if (multScatter) *multScatter = a->Mvctx;
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 EXTERN_C_BEGIN
5518 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5519 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5520 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5521 EXTERN_C_END
5522 
5523 #undef __FUNCT__
5524 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5525 /*
5526     Computes (B'*A')' since computing B*A directly is untenable
5527 
5528                n                       p                          p
5529         (              )       (              )         (                  )
5530       m (      A       )  *  n (       B      )   =   m (         C        )
5531         (              )       (              )         (                  )
5532 
5533 */
5534 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5535 {
5536   PetscErrorCode     ierr;
5537   Mat                At,Bt,Ct;
5538 
5539   PetscFunctionBegin;
5540   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5541   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5542   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5543   ierr = MatDestroy(&At);CHKERRQ(ierr);
5544   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5545   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5546   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5547   PetscFunctionReturn(0);
5548 }
5549 
5550 #undef __FUNCT__
5551 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5552 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5553 {
5554   PetscErrorCode ierr;
5555   PetscInt       m=A->rmap->n,n=B->cmap->n;
5556   Mat            Cmat;
5557 
5558   PetscFunctionBegin;
5559   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5560   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5561   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5562   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5563   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5564   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5565   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5566   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5567 
5568   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5569   *C = Cmat;
5570   PetscFunctionReturn(0);
5571 }
5572 
5573 /* ----------------------------------------------------------------*/
5574 #undef __FUNCT__
5575 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5576 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5577 {
5578   PetscErrorCode ierr;
5579 
5580   PetscFunctionBegin;
5581   if (scall == MAT_INITIAL_MATRIX){
5582     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5583   }
5584   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5585   PetscFunctionReturn(0);
5586 }
5587 
5588 EXTERN_C_BEGIN
5589 #if defined(PETSC_HAVE_MUMPS)
5590 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5591 #endif
5592 #if defined(PETSC_HAVE_PASTIX)
5593 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5594 #endif
5595 #if defined(PETSC_HAVE_SUPERLU_DIST)
5596 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5597 #endif
5598 #if defined(PETSC_HAVE_CLIQUE)
5599 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5600 #endif
5601 EXTERN_C_END
5602 
5603 /*MC
5604    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5605 
5606    Options Database Keys:
5607 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5608 
5609   Level: beginner
5610 
5611 .seealso: MatCreateAIJ()
5612 M*/
5613 
5614 EXTERN_C_BEGIN
5615 #undef __FUNCT__
5616 #define __FUNCT__ "MatCreate_MPIAIJ"
5617 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5618 {
5619   Mat_MPIAIJ     *b;
5620   PetscErrorCode ierr;
5621   PetscMPIInt    size;
5622 
5623   PetscFunctionBegin;
5624   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5625   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5626   B->data         = (void*)b;
5627   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5628   B->assembled    = PETSC_FALSE;
5629   B->insertmode   = NOT_SET_VALUES;
5630   b->size         = size;
5631   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5632 
5633   /* build cache for off array entries formed */
5634   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5635   b->donotstash  = PETSC_FALSE;
5636   b->colmap      = 0;
5637   b->garray      = 0;
5638   b->roworiented = PETSC_TRUE;
5639 
5640   /* stuff used for matrix vector multiply */
5641   b->lvec      = PETSC_NULL;
5642   b->Mvctx     = PETSC_NULL;
5643 
5644   /* stuff for MatGetRow() */
5645   b->rowindices   = 0;
5646   b->rowvalues    = 0;
5647   b->getrowactive = PETSC_FALSE;
5648 
5649   /* flexible pointer used in CUSP/CUSPARSE classes */
5650   b->spptr        = PETSC_NULL;
5651 
5652 #if defined(PETSC_HAVE_MUMPS)
5653   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5654                                      "MatGetFactor_aij_mumps",
5655                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5656 #endif
5657 #if defined(PETSC_HAVE_PASTIX)
5658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5659 					   "MatGetFactor_mpiaij_pastix",
5660 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5661 #endif
5662 #if defined(PETSC_HAVE_SUPERLU_DIST)
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5664                                      "MatGetFactor_mpiaij_superlu_dist",
5665                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5666 #endif
5667 #if defined(PETSC_HAVE_CLIQUE)
5668   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5669                                      "MatGetFactor_aij_clique",
5670                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5671 #endif
5672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5673                                      "MatStoreValues_MPIAIJ",
5674                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5676                                      "MatRetrieveValues_MPIAIJ",
5677                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5679 				     "MatGetDiagonalBlock_MPIAIJ",
5680                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5682 				     "MatIsTranspose_MPIAIJ",
5683 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5685 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5686 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5688 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5689 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5691 				     "MatDiagonalScaleLocal_MPIAIJ",
5692 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5693   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5694                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5695                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5697                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5698                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5699   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5700                                      "MatConvert_MPIAIJ_MPISBAIJ",
5701                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5703                                      "MatMatMult_MPIDense_MPIAIJ",
5704                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5705   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5706                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5707                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5708   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5709                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5710                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5711   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5712   PetscFunctionReturn(0);
5713 }
5714 EXTERN_C_END
5715 
5716 #undef __FUNCT__
5717 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5718 /*@
5719      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5720          and "off-diagonal" part of the matrix in CSR format.
5721 
5722    Collective on MPI_Comm
5723 
5724    Input Parameters:
5725 +  comm - MPI communicator
5726 .  m - number of local rows (Cannot be PETSC_DECIDE)
5727 .  n - This value should be the same as the local size used in creating the
5728        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5729        calculated if N is given) For square matrices n is almost always m.
5730 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5731 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5732 .   i - row indices for "diagonal" portion of matrix
5733 .   j - column indices
5734 .   a - matrix values
5735 .   oi - row indices for "off-diagonal" portion of matrix
5736 .   oj - column indices
5737 -   oa - matrix values
5738 
5739    Output Parameter:
5740 .   mat - the matrix
5741 
5742    Level: advanced
5743 
5744    Notes:
5745        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5746        must free the arrays once the matrix has been destroyed and not before.
5747 
5748        The i and j indices are 0 based
5749 
5750        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5751 
5752        This sets local rows and cannot be used to set off-processor values.
5753 
5754        You cannot later use MatSetValues() to change values in this matrix.
5755 
5756 .keywords: matrix, aij, compressed row, sparse, parallel
5757 
5758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5759           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5760 @*/
5761 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5762 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5763 {
5764   PetscErrorCode ierr;
5765   Mat_MPIAIJ     *maij;
5766 
5767  PetscFunctionBegin;
5768   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5769   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5770   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5771   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5772   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5773   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5774   maij = (Mat_MPIAIJ*) (*mat)->data;
5775   maij->donotstash     = PETSC_TRUE;
5776   (*mat)->preallocated = PETSC_TRUE;
5777 
5778   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5779   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5780 
5781   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5782   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5783 
5784   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5785   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5786   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5787   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5788 
5789   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5790   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5791   PetscFunctionReturn(0);
5792 }
5793 
5794 /*
5795     Special version for direct calls from Fortran
5796 */
5797 #include <petsc-private/fortranimpl.h>
5798 
5799 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5800 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5801 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5802 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5803 #endif
5804 
5805 /* Change these macros so can be used in void function */
5806 #undef CHKERRQ
5807 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5808 #undef SETERRQ2
5809 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5810 #undef SETERRQ3
5811 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5812 #undef SETERRQ
5813 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5814 
5815 EXTERN_C_BEGIN
5816 #undef __FUNCT__
5817 #define __FUNCT__ "matsetvaluesmpiaij_"
5818 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5819 {
5820   Mat             mat = *mmat;
5821   PetscInt        m = *mm, n = *mn;
5822   InsertMode      addv = *maddv;
5823   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5824   PetscScalar     value;
5825   PetscErrorCode  ierr;
5826 
5827   MatCheckPreallocated(mat,1);
5828   if (mat->insertmode == NOT_SET_VALUES) {
5829     mat->insertmode = addv;
5830   }
5831 #if defined(PETSC_USE_DEBUG)
5832   else if (mat->insertmode != addv) {
5833     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5834   }
5835 #endif
5836   {
5837   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5838   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5839   PetscBool       roworiented = aij->roworiented;
5840 
5841   /* Some Variables required in the macro */
5842   Mat             A = aij->A;
5843   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5844   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5845   MatScalar       *aa = a->a;
5846   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5847   Mat             B = aij->B;
5848   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5849   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5850   MatScalar       *ba = b->a;
5851 
5852   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5853   PetscInt        nonew = a->nonew;
5854   MatScalar       *ap1,*ap2;
5855 
5856   PetscFunctionBegin;
5857   for (i=0; i<m; i++) {
5858     if (im[i] < 0) continue;
5859 #if defined(PETSC_USE_DEBUG)
5860     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5861 #endif
5862     if (im[i] >= rstart && im[i] < rend) {
5863       row      = im[i] - rstart;
5864       lastcol1 = -1;
5865       rp1      = aj + ai[row];
5866       ap1      = aa + ai[row];
5867       rmax1    = aimax[row];
5868       nrow1    = ailen[row];
5869       low1     = 0;
5870       high1    = nrow1;
5871       lastcol2 = -1;
5872       rp2      = bj + bi[row];
5873       ap2      = ba + bi[row];
5874       rmax2    = bimax[row];
5875       nrow2    = bilen[row];
5876       low2     = 0;
5877       high2    = nrow2;
5878 
5879       for (j=0; j<n; j++) {
5880         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5881         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5882         if (in[j] >= cstart && in[j] < cend){
5883           col = in[j] - cstart;
5884           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5885         } else if (in[j] < 0) continue;
5886 #if defined(PETSC_USE_DEBUG)
5887         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5888 #endif
5889         else {
5890           if (mat->was_assembled) {
5891             if (!aij->colmap) {
5892               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5893             }
5894 #if defined (PETSC_USE_CTABLE)
5895             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5896 	    col--;
5897 #else
5898             col = aij->colmap[in[j]] - 1;
5899 #endif
5900             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5901               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5902               col =  in[j];
5903               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5904               B = aij->B;
5905               b = (Mat_SeqAIJ*)B->data;
5906               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5907               rp2      = bj + bi[row];
5908               ap2      = ba + bi[row];
5909               rmax2    = bimax[row];
5910               nrow2    = bilen[row];
5911               low2     = 0;
5912               high2    = nrow2;
5913               bm       = aij->B->rmap->n;
5914               ba = b->a;
5915             }
5916           } else col = in[j];
5917           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5918         }
5919       }
5920     } else {
5921       if (!aij->donotstash) {
5922         if (roworiented) {
5923           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5924         } else {
5925           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5926         }
5927       }
5928     }
5929   }}
5930   PetscFunctionReturnVoid();
5931 }
5932 EXTERN_C_END
5933 
5934