xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 009bbdc485cd9ad46be9940d3549e2dde9cdc322)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267         for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270           jj++;
271         }
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1)  low1 = 0;     \
382     else                 high1 = nrow1; \
383     lastcol1 = col;\
384     while (high1-low1 > 5) { \
385       t = (low1+high1)/2; \
386       if (rp1[t] > col) high1 = t; \
387       else              low1  = t; \
388     } \
389       for (_i=low1; _i<high1; _i++) { \
390         if (rp1[_i] > col) break; \
391         if (rp1[_i] == col) { \
392           if (addv == ADD_VALUES) ap1[_i] += value;   \
393           else                    ap1[_i] = value; \
394           goto a_noinsert; \
395         } \
396       }  \
397       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
398       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
399       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
400       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
401       N = nrow1++ - 1; a->nz++; high1++; \
402       /* shift up all the later entries in this row */ \
403       for (ii=N; ii>=_i; ii--) { \
404         rp1[ii+1] = rp1[ii]; \
405         ap1[ii+1] = ap1[ii]; \
406       } \
407       rp1[_i] = col;  \
408       ap1[_i] = value;  \
409       a_noinsert: ; \
410       ailen[row] = nrow1; \
411 }
412 
413 
414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
415 { \
416     if (col <= lastcol2) low2 = 0;                        \
417     else high2 = nrow2;                                   \
418     lastcol2 = col;                                       \
419     while (high2-low2 > 5) {                              \
420       t = (low2+high2)/2;                                 \
421       if (rp2[t] > col) high2 = t;                        \
422       else             low2  = t;                         \
423     }                                                     \
424     for (_i=low2; _i<high2; _i++) {                       \
425       if (rp2[_i] > col) break;                           \
426       if (rp2[_i] == col) {                               \
427         if (addv == ADD_VALUES) ap2[_i] += value;         \
428         else                    ap2[_i] = value;          \
429         goto b_noinsert;                                  \
430       }                                                   \
431     }                                                     \
432     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
433     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
434     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
435     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
436     N = nrow2++ - 1; b->nz++; high2++;                    \
437     /* shift up all the later entries in this row */      \
438     for (ii=N; ii>=_i; ii--) {                            \
439       rp2[ii+1] = rp2[ii];                                \
440       ap2[ii+1] = ap2[ii];                                \
441     }                                                     \
442     rp2[_i] = col;                                        \
443     ap2[_i] = value;                                      \
444     b_noinsert: ;                                         \
445     bilen[row] = nrow2;                                   \
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
450 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
451 {
452   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
453   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
454   PetscErrorCode ierr;
455   PetscInt       l,*garray = mat->garray,diag;
456 
457   PetscFunctionBegin;
458   /* code only works for square matrices A */
459 
460   /* find size of row to the left of the diagonal part */
461   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
462   row  = row - diag;
463   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
464     if (garray[b->j[b->i[row]+l]] > diag) break;
465   }
466   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
467 
468   /* diagonal part */
469   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* right of diagonal part */
472   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatSetValues_MPIAIJ"
478 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   PetscScalar    value;
482   PetscErrorCode ierr;
483   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
484   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
485   PetscBool      roworiented = aij->roworiented;
486 
487   /* Some Variables required in the macro */
488   Mat            A = aij->A;
489   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
490   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
491   MatScalar      *aa = a->a;
492   PetscBool      ignorezeroentries = a->ignorezeroentries;
493   Mat            B = aij->B;
494   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
495   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
496   MatScalar      *ba = b->a;
497 
498   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
499   PetscInt       nonew;
500   MatScalar      *ap1,*ap2;
501 
502   PetscFunctionBegin;
503   if (v) PetscValidScalarPointer(v,6);
504   for (i=0; i<m; i++) {
505     if (im[i] < 0) continue;
506 #if defined(PETSC_USE_DEBUG)
507     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
508 #endif
509     if (im[i] >= rstart && im[i] < rend) {
510       row      = im[i] - rstart;
511       lastcol1 = -1;
512       rp1      = aj + ai[row];
513       ap1      = aa + ai[row];
514       rmax1    = aimax[row];
515       nrow1    = ailen[row];
516       low1     = 0;
517       high1    = nrow1;
518       lastcol2 = -1;
519       rp2      = bj + bi[row];
520       ap2      = ba + bi[row];
521       rmax2    = bimax[row];
522       nrow2    = bilen[row];
523       low2     = 0;
524       high2    = nrow2;
525 
526       for (j=0; j<n; j++) {
527         if (v) {
528           if (roworiented) value = v[i*n+j];
529           else             value = v[i+j*m];
530         } else value = 0.0;
531         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
532         if (in[j] >= cstart && in[j] < cend) {
533           col = in[j] - cstart;
534           nonew = a->nonew;
535           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
536         } else if (in[j] < 0) continue;
537 #if defined(PETSC_USE_DEBUG)
538         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
539 #endif
540         else {
541           if (mat->was_assembled) {
542             if (!aij->colmap) {
543               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
544             }
545 #if defined (PETSC_USE_CTABLE)
546             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
547             col--;
548 #else
549             col = aij->colmap[in[j]] - 1;
550 #endif
551             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
552               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
553               col =  in[j];
554               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
555               B = aij->B;
556               b = (Mat_SeqAIJ*)B->data;
557               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
558               rp2      = bj + bi[row];
559               ap2      = ba + bi[row];
560               rmax2    = bimax[row];
561               nrow2    = bilen[row];
562               low2     = 0;
563               high2    = nrow2;
564               bm       = aij->B->rmap->n;
565               ba = b->a;
566             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
567           } else col = in[j];
568           nonew = b->nonew;
569           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
570         }
571       }
572     } else {
573       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
574       if (!aij->donotstash) {
575         mat->assembled = PETSC_FALSE;
576         if (roworiented) {
577           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         } else {
579           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
580         }
581       }
582     }
583   }
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatGetValues_MPIAIJ"
589 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
590 {
591   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
592   PetscErrorCode ierr;
593   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
594   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
595 
596   PetscFunctionBegin;
597   for (i=0; i<m; i++) {
598     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
599     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
600     if (idxm[i] >= rstart && idxm[i] < rend) {
601       row = idxm[i] - rstart;
602       for (j=0; j<n; j++) {
603         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
604         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
605         if (idxn[j] >= cstart && idxn[j] < cend) {
606           col = idxn[j] - cstart;
607           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
608         } else {
609           if (!aij->colmap) {
610             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
611           }
612 #if defined (PETSC_USE_CTABLE)
613           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
614           col --;
615 #else
616           col = aij->colmap[idxn[j]] - 1;
617 #endif
618           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
619           else {
620             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
621           }
622         }
623       }
624     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
633 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   PetscErrorCode ierr;
637   PetscInt       nstash,reallocs;
638   InsertMode     addv;
639 
640   PetscFunctionBegin;
641   if (aij->donotstash || mat->nooffprocentries) {
642     PetscFunctionReturn(0);
643   }
644 
645   /* make sure all processors are either in INSERTMODE or ADDMODE */
646   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
647   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
648   mat->insertmode = addv; /* in case this processor had no cache */
649 
650   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
651   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
652   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
659 {
660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
661   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
662   PetscErrorCode ierr;
663   PetscMPIInt    n;
664   PetscInt       i,j,rstart,ncols,flg;
665   PetscInt       *row,*col;
666   PetscBool      other_disassembled;
667   PetscScalar    *val;
668   InsertMode     addv = mat->insertmode;
669 
670   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
671 
672   PetscFunctionBegin;
673   if (!aij->donotstash && !mat->nooffprocentries) {
674     while (1) {
675       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
676       if (!flg) break;
677 
678       for (i=0; i<n;) {
679         /* Now identify the consecutive vals belonging to the same row */
680         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
681         if (j < n) ncols = j-i;
682         else       ncols = n-i;
683         /* Now assemble all these values with a single function call */
684         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
685         i = j;
686       }
687     }
688     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
689   }
690   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
691   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
692 
693   /* determine if any processor has disassembled, if so we must
694      also disassemble ourselfs, in order that we may reassemble. */
695   /*
696      if nonzero structure of submatrix B cannot change then we know that
697      no processor disassembled thus we can skip this stuff
698   */
699   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
700     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
701     if (mat->was_assembled && !other_disassembled) {
702       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
703     }
704   }
705   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
706     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
707   }
708   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
709   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
710   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
711   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
712 
713   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
714   aij->rowvalues = 0;
715 
716   /* used by MatAXPY() */
717   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
718   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
719 
720   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
721   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
728 {
729   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
734   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatZeroRows_MPIAIJ"
740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
741 {
742   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode    ierr;
744   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
745   PetscInt          i,*owners = A->rmap->range;
746   PetscInt          *nprocs,j,idx,nsends,row;
747   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
748   PetscInt          *rvalues,count,base,slen,*source;
749   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
750   MPI_Comm          comm = ((PetscObject)A)->comm;
751   MPI_Request       *send_waits,*recv_waits;
752   MPI_Status        recv_status,*send_status;
753   const PetscScalar *xx;
754   PetscScalar       *bb;
755 #if defined(PETSC_DEBUG)
756   PetscBool      found = PETSC_FALSE;
757 #endif
758 
759   PetscFunctionBegin;
760   /*  first count number of contributors to each processor */
761   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
762   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
763   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
764   j = 0;
765   for (i=0; i<N; i++) {
766     if (lastidx > (idx = rows[i])) j = 0;
767     lastidx = idx;
768     for (; j<size; j++) {
769       if (idx >= owners[j] && idx < owners[j+1]) {
770         nprocs[2*j]++;
771         nprocs[2*j+1] = 1;
772         owner[i] = j;
773 #if defined(PETSC_DEBUG)
774         found = PETSC_TRUE;
775 #endif
776         break;
777       }
778     }
779 #if defined(PETSC_DEBUG)
780     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
781     found = PETSC_FALSE;
782 #endif
783   }
784   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
785 
786   if (A->nooffproczerorows) {
787     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
788     nrecvs = nsends;
789     nmax   = N;
790   } else {
791     /* inform other processors of number of messages and max length*/
792     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
793   }
794 
795   /* post receives:   */
796   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
797   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
798   for (i=0; i<nrecvs; i++) {
799     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
800   }
801 
802   /* do sends:
803       1) starts[i] gives the starting index in svalues for stuff going to
804          the ith processor
805   */
806   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
807   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
808   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
809   starts[0] = 0;
810   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
811   for (i=0; i<N; i++) {
812     svalues[starts[owner[i]]++] = rows[i];
813   }
814 
815   starts[0] = 0;
816   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
817   count = 0;
818   for (i=0; i<size; i++) {
819     if (nprocs[2*i+1]) {
820       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
821     }
822   }
823   ierr = PetscFree(starts);CHKERRQ(ierr);
824 
825   base = owners[rank];
826 
827   /*  wait on receives */
828   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
829   count  = nrecvs; slen = 0;
830   while (count) {
831     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
832     /* unpack receives into our local space */
833     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
834     source[imdex]  = recv_status.MPI_SOURCE;
835     lens[imdex]    = n;
836     slen          += n;
837     count--;
838   }
839   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
840 
841   /* move the data into the send scatter */
842   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
843   count = 0;
844   for (i=0; i<nrecvs; i++) {
845     values = rvalues + i*nmax;
846     for (j=0; j<lens[i]; j++) {
847       lrows[count++] = values[j] - base;
848     }
849   }
850   ierr = PetscFree(rvalues);CHKERRQ(ierr);
851   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
852   ierr = PetscFree(owner);CHKERRQ(ierr);
853   ierr = PetscFree(nprocs);CHKERRQ(ierr);
854 
855   /* fix right hand side if needed */
856   if (x && b) {
857     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
858     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
859     for (i=0; i<slen; i++) {
860       bb[lrows[i]] = diag*xx[lrows[i]];
861     }
862     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
864   }
865   /*
866         Zero the required rows. If the "diagonal block" of the matrix
867      is square and the user wishes to set the diagonal we use separate
868      code so that MatSetValues() is not called for each diagonal allocating
869      new memory, thus calling lots of mallocs and slowing things down.
870 
871   */
872   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
873   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
875     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
876   } else if (diag != 0.0) {
877     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
878     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
879       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
880 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
881     }
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm = ((PetscObject)A)->comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool         found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   /*  first count number of contributors to each processor */
931   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
932   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
933   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
934   j = 0;
935   for (i=0; i<N; i++) {
936     if (lastidx > (idx = rows[i])) j = 0;
937     lastidx = idx;
938     for (; j<size; j++) {
939       if (idx >= owners[j] && idx < owners[j+1]) {
940         nprocs[2*j]++;
941         nprocs[2*j+1] = 1;
942         owner[i] = j;
943 #if defined(PETSC_DEBUG)
944         found = PETSC_TRUE;
945 #endif
946         break;
947       }
948     }
949 #if defined(PETSC_DEBUG)
950     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
951     found = PETSC_FALSE;
952 #endif
953   }
954   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
955 
956   /* inform other processors of number of messages and max length*/
957   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
958 
959   /* post receives:   */
960   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
961   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
962   for (i=0; i<nrecvs; i++) {
963     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
964   }
965 
966   /* do sends:
967       1) starts[i] gives the starting index in svalues for stuff going to
968          the ith processor
969   */
970   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
971   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
972   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
973   starts[0] = 0;
974   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
975   for (i=0; i<N; i++) {
976     svalues[starts[owner[i]]++] = rows[i];
977   }
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count  = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998     source[imdex]  = recv_status.MPI_SOURCE;
999     lens[imdex]    = n;
1000     slen          += n;
1001     count--;
1002   }
1003   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1004 
1005   /* move the data into the send scatter */
1006   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1007   count = 0;
1008   for (i=0; i<nrecvs; i++) {
1009     values = rvalues + i*nmax;
1010     for (j=0; j<lens[i]; j++) {
1011       lrows[count++] = values[j] - base;
1012     }
1013   }
1014   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1015   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1016   ierr = PetscFree(owner);CHKERRQ(ierr);
1017   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1018   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1019 
1020   /* zero diagonal part of matrix */
1021   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1022 
1023   /* handle off diagonal part of matrix */
1024   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1025   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1026   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1027   for (i=0; i<slen; i++) {
1028     bb[lrows[i]] = 1;
1029   }
1030   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1031   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1034   if (x) {
1035     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1038     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1039   }
1040   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1041 
1042   /* remove zeroed rows of off diagonal matrix */
1043   ii = aij->i;
1044   for (i=0; i<slen; i++) {
1045     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1046   }
1047 
1048   /* loop over all elements of off process part of matrix zeroing removed columns*/
1049   if (aij->compressedrow.use) {
1050     m    = aij->compressedrow.nrows;
1051     ii   = aij->compressedrow.i;
1052     ridx = aij->compressedrow.rindex;
1053     for (i=0; i<m; i++) {
1054       n   = ii[i+1] - ii[i];
1055       aj  = aij->j + ii[i];
1056       aa  = aij->a + ii[i];
1057 
1058       for (j=0; j<n; j++) {
1059         if (PetscAbsScalar(mask[*aj])) {
1060           if (b) bb[*ridx] -= *aa*xx[*aj];
1061           *aa        = 0.0;
1062         }
1063         aa++;
1064         aj++;
1065       }
1066       ridx++;
1067     }
1068   } else { /* do not use compressed row format */
1069     m = l->B->rmap->n;
1070     for (i=0; i<m; i++) {
1071       n   = ii[i+1] - ii[i];
1072       aj  = aij->j + ii[i];
1073       aa  = aij->a + ii[i];
1074       for (j=0; j<n; j++) {
1075         if (PetscAbsScalar(mask[*aj])) {
1076           if (b) bb[i] -= *aa*xx[*aj];
1077           *aa    = 0.0;
1078         }
1079         aa++;
1080         aj++;
1081       }
1082     }
1083   }
1084   if (x) {
1085     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1086     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1087   }
1088   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1089   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1090   ierr = PetscFree(lrows);CHKERRQ(ierr);
1091 
1092   /* wait on sends */
1093   if (nsends) {
1094     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1095     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1096     ierr = PetscFree(send_status);CHKERRQ(ierr);
1097   }
1098   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1099   ierr = PetscFree(svalues);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMult_MPIAIJ"
1105 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109   PetscInt       nt;
1110 
1111   PetscFunctionBegin;
1112   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1113   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1114   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1123 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1124 {
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1135 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1136 {
1137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1150 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1151 {
1152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1153   PetscErrorCode ierr;
1154   PetscBool      merged;
1155 
1156   PetscFunctionBegin;
1157   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1158   /* do nondiagonal part */
1159   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1160   if (!merged) {
1161     /* send it on its way */
1162     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1163     /* do local part */
1164     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1165     /* receive remote parts: note this assumes the values are not actually */
1166     /* added in yy until the next line, */
1167     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1168   } else {
1169     /* do local part */
1170     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1171     /* send it on its way */
1172     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1173     /* values actually were received in the Begin() but we need to call this nop */
1174     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1175   }
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 EXTERN_C_BEGIN
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1182 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1183 {
1184   MPI_Comm       comm;
1185   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1186   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1187   IS             Me,Notme;
1188   PetscErrorCode ierr;
1189   PetscInt       M,N,first,last,*notme,i;
1190   PetscMPIInt    size;
1191 
1192   PetscFunctionBegin;
1193   /* Easy test: symmetric diagonal block */
1194   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1195   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1196   if (!*f) PetscFunctionReturn(0);
1197   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1199   if (size == 1) PetscFunctionReturn(0);
1200 
1201   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1202   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1203   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1204   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1205   for (i=0; i<first; i++) notme[i] = i;
1206   for (i=last; i<M; i++) notme[i-last+first] = i;
1207   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1208   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1209   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1210   Aoff = Aoffs[0];
1211   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1212   Boff = Boffs[0];
1213   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1214   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1215   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1216   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1217   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1218   ierr = PetscFree(notme);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 EXTERN_C_END
1222 
1223 #undef __FUNCT__
1224 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1225 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1226 {
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228   PetscErrorCode ierr;
1229 
1230   PetscFunctionBegin;
1231   /* do nondiagonal part */
1232   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1233   /* send it on its way */
1234   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1235   /* do local part */
1236   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1237   /* receive remote parts */
1238   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 /*
1243   This only works correctly for square matrices where the subblock A->A is the
1244    diagonal block
1245 */
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1248 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1249 {
1250   PetscErrorCode ierr;
1251   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1252 
1253   PetscFunctionBegin;
1254   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1255   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1256   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatScale_MPIAIJ"
1262 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1263 {
1264   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1269   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatDestroy_MPIAIJ"
1275 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1276 {
1277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281 #if defined(PETSC_USE_LOG)
1282   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1283 #endif
1284   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1285   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1286   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1287   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1288 #if defined (PETSC_USE_CTABLE)
1289   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1290 #else
1291   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1292 #endif
1293   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1294   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1295   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1296   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1297   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1298   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1299 
1300   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1314 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1315 {
1316   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1317   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1318   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1319   PetscErrorCode    ierr;
1320   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1321   int               fd;
1322   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1323   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1324   PetscScalar       *column_values;
1325   PetscInt          message_count,flowcontrolcount;
1326   FILE              *file;
1327 
1328   PetscFunctionBegin;
1329   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1330   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1331   nz   = A->nz + B->nz;
1332   if (!rank) {
1333     header[0] = MAT_FILE_CLASSID;
1334     header[1] = mat->rmap->N;
1335     header[2] = mat->cmap->N;
1336     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1337     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1338     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1339     /* get largest number of rows any processor has */
1340     rlen = mat->rmap->n;
1341     range = mat->rmap->range;
1342     for (i=1; i<size; i++) {
1343       rlen = PetscMax(rlen,range[i+1] - range[i]);
1344     }
1345   } else {
1346     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347     rlen = mat->rmap->n;
1348   }
1349 
1350   /* load up the local row counts */
1351   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1352   for (i=0; i<mat->rmap->n; i++) {
1353     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1354   }
1355 
1356   /* store the row lengths to the file */
1357   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1358   if (!rank) {
1359     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1360     for (i=1; i<size; i++) {
1361       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1362       rlen = range[i+1] - range[i];
1363       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1364       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1365     }
1366     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1367   } else {
1368     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1369     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1370     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1371   }
1372   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1373 
1374   /* load up the local column indices */
1375   nzmax = nz; /* th processor needs space a largest processor needs */
1376   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1377   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1378   cnt  = 0;
1379   for (i=0; i<mat->rmap->n; i++) {
1380     for (j=B->i[i]; j<B->i[i+1]; j++) {
1381       if ((col = garray[B->j[j]]) > cstart) break;
1382       column_indices[cnt++] = col;
1383     }
1384     for (k=A->i[i]; k<A->i[i+1]; k++) {
1385       column_indices[cnt++] = A->j[k] + cstart;
1386     }
1387     for (; j<B->i[i+1]; j++) {
1388       column_indices[cnt++] = garray[B->j[j]];
1389     }
1390   }
1391   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1392 
1393   /* store the column indices to the file */
1394    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1395   if (!rank) {
1396     MPI_Status status;
1397     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1398     for (i=1; i<size; i++) {
1399       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1400       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1401       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1402       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1403       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1404     }
1405      ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1406   } else {
1407     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1408     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1409     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1410     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1411   }
1412   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1413 
1414   /* load up the local column values */
1415   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1416   cnt  = 0;
1417   for (i=0; i<mat->rmap->n; i++) {
1418     for (j=B->i[i]; j<B->i[i+1]; j++) {
1419       if (garray[B->j[j]] > cstart) break;
1420       column_values[cnt++] = B->a[j];
1421     }
1422     for (k=A->i[i]; k<A->i[i+1]; k++) {
1423       column_values[cnt++] = A->a[k];
1424     }
1425     for (; j<B->i[i+1]; j++) {
1426       column_values[cnt++] = B->a[j];
1427     }
1428   }
1429   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1430 
1431   /* store the column values to the file */
1432    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1433   if (!rank) {
1434     MPI_Status status;
1435     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1436     for (i=1; i<size; i++) {
1437        ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1438       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1439       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1440       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1441       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1442     }
1443     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1444   } else {
1445     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1446     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1447     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1448     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1449   }
1450   ierr = PetscFree(column_values);CHKERRQ(ierr);
1451 
1452   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1453   if (file) {
1454     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1461 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1462 {
1463   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1464   PetscErrorCode    ierr;
1465   PetscMPIInt       rank = aij->rank,size = aij->size;
1466   PetscBool         isdraw,iascii,isbinary;
1467   PetscViewer       sviewer;
1468   PetscViewerFormat format;
1469 
1470   PetscFunctionBegin;
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1473   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1474   if (iascii) {
1475     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1476     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1477       MatInfo    info;
1478       PetscBool  inodes;
1479 
1480       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1481       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1482       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1483       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1484       if (!inodes) {
1485         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1486                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1487       } else {
1488         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1489                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1490       }
1491       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1492       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1493       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1494       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1495       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1497       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1498       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1499       PetscFunctionReturn(0);
1500     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1501       PetscInt   inodecount,inodelimit,*inodes;
1502       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1503       if (inodes) {
1504         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1505       } else {
1506         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1507       }
1508       PetscFunctionReturn(0);
1509     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1510       PetscFunctionReturn(0);
1511     }
1512   } else if (isbinary) {
1513     if (size == 1) {
1514       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1515       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1516     } else {
1517       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1518     }
1519     PetscFunctionReturn(0);
1520   } else if (isdraw) {
1521     PetscDraw  draw;
1522     PetscBool  isnull;
1523     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1524     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1525   }
1526 
1527   if (size == 1) {
1528     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1529     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1530   } else {
1531     /* assemble the entire matrix onto first processor. */
1532     Mat         A;
1533     Mat_SeqAIJ  *Aloc;
1534     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1535     MatScalar   *a;
1536 
1537     if (mat->rmap->N > 1024) {
1538       PetscBool  flg = PETSC_FALSE;
1539 
1540       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1541       if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542     }
1543 
1544     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1545     if (!rank) {
1546       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1547     } else {
1548       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1549     }
1550     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1551     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1552     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1553     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1554     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1555 
1556     /* copy over the A part */
1557     Aloc = (Mat_SeqAIJ*)aij->A->data;
1558     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1559     row = mat->rmap->rstart;
1560     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1561     for (i=0; i<m; i++) {
1562       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1563       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1564     }
1565     aj = Aloc->j;
1566     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1567 
1568     /* copy over the B part */
1569     Aloc = (Mat_SeqAIJ*)aij->B->data;
1570     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1571     row  = mat->rmap->rstart;
1572     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1573     ct   = cols;
1574     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1575     for (i=0; i<m; i++) {
1576       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1577       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1578     }
1579     ierr = PetscFree(ct);CHKERRQ(ierr);
1580     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     /*
1583        Everyone has to call to draw the matrix since the graphics waits are
1584        synchronized across all processors that share the PetscDraw object
1585     */
1586     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1587     if (!rank) {
1588       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1589       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1590       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1591       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1592     }
1593     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1594     ierr = MatDestroy(&A);CHKERRQ(ierr);
1595   }
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatView_MPIAIJ"
1601 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1602 {
1603   PetscErrorCode ierr;
1604   PetscBool      iascii,isdraw,issocket,isbinary;
1605 
1606   PetscFunctionBegin;
1607   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1609   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1611   if (iascii || isdraw || isbinary || issocket) {
1612     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1613   }
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatSOR_MPIAIJ"
1619 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1620 {
1621   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1622   PetscErrorCode ierr;
1623   Vec            bb1 = 0;
1624   PetscBool      hasop;
1625 
1626   PetscFunctionBegin;
1627   if (flag == SOR_APPLY_UPPER) {
1628     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1629     PetscFunctionReturn(0);
1630   }
1631 
1632   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1633     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1634   }
1635 
1636   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1637     if (flag & SOR_ZERO_INITIAL_GUESS) {
1638       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1639       its--;
1640     }
1641 
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1654     if (flag & SOR_ZERO_INITIAL_GUESS) {
1655       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1656       its--;
1657     }
1658     while (its--) {
1659       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661 
1662       /* update rhs: bb1 = bb - B*x */
1663       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1664       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1665 
1666       /* local sweep */
1667       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1668     }
1669   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1670     if (flag & SOR_ZERO_INITIAL_GUESS) {
1671       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1672       its--;
1673     }
1674     while (its--) {
1675       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1676       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677 
1678       /* update rhs: bb1 = bb - B*x */
1679       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1680       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1681 
1682       /* local sweep */
1683       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1684     }
1685   } else if (flag & SOR_EISENSTAT) {
1686     Vec         xx1;
1687 
1688     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1689     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1690 
1691     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1692     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1693     if (!mat->diag) {
1694       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1695       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1696     }
1697     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1698     if (hasop) {
1699       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1700     } else {
1701       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1702     }
1703     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1704 
1705     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1706 
1707     /* local sweep */
1708     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1709     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1710     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1711   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1712 
1713   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 #undef __FUNCT__
1718 #define __FUNCT__ "MatPermute_MPIAIJ"
1719 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1720 {
1721   Mat            aA,aB,Aperm;
1722   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1723   PetscScalar    *aa,*ba;
1724   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1725   PetscSF        rowsf,sf;
1726   IS             parcolp = PETSC_NULL;
1727   PetscBool      done;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1732   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1733   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1734   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1735 
1736   /* Invert row permutation to find out where my rows should go */
1737   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1738   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1739   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1740   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1741   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1742   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1743 
1744   /* Invert column permutation to find out where my columns should go */
1745   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1749   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1750   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1751   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1752 
1753   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1754   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1755   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1756 
1757   /* Find out where my gcols should go */
1758   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1759   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1760   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1761   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1762   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1763   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1764   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1765   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1766 
1767   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1768   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1769   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1770   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1771   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1772   for (i=0; i<m; i++) {
1773     PetscInt row = rdest[i],rowner;
1774     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1775     for (j=ai[i]; j<ai[i+1]; j++) {
1776       PetscInt cowner,col = cdest[aj[j]];
1777       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1778       if (rowner == cowner) dnnz[i]++;
1779       else onnz[i]++;
1780     }
1781     for (j=bi[i]; j<bi[i+1]; j++) {
1782       PetscInt cowner,col = gcdest[bj[j]];
1783       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1784       if (rowner == cowner) dnnz[i]++;
1785       else onnz[i]++;
1786     }
1787   }
1788   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1789   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1790   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1791   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1792   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1793   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1794   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1795 
1796   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1797   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1798   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1799   for (i=0; i<m; i++) {
1800     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1801     PetscInt rowlen;
1802     rowlen = ai[i+1] - ai[i];
1803     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1804     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1805     rowlen = bi[i+1] - bi[i];
1806     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1808   }
1809   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1811   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1812   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1813   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1814   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1815   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1816   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1817   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1818   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1819   *B = Aperm;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1826 {
1827   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1828   Mat            A = mat->A,B = mat->B;
1829   PetscErrorCode ierr;
1830   PetscReal      isend[5],irecv[5];
1831 
1832   PetscFunctionBegin;
1833   info->block_size     = 1.0;
1834   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1835   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1836   isend[3] = info->memory;  isend[4] = info->mallocs;
1837   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1839   isend[3] += info->memory;  isend[4] += info->mallocs;
1840   if (flag == MAT_LOCAL) {
1841     info->nz_used      = isend[0];
1842     info->nz_allocated = isend[1];
1843     info->nz_unneeded  = isend[2];
1844     info->memory       = isend[3];
1845     info->mallocs      = isend[4];
1846   } else if (flag == MAT_GLOBAL_MAX) {
1847     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1848     info->nz_used      = irecv[0];
1849     info->nz_allocated = irecv[1];
1850     info->nz_unneeded  = irecv[2];
1851     info->memory       = irecv[3];
1852     info->mallocs      = irecv[4];
1853   } else if (flag == MAT_GLOBAL_SUM) {
1854     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1855     info->nz_used      = irecv[0];
1856     info->nz_allocated = irecv[1];
1857     info->nz_unneeded  = irecv[2];
1858     info->memory       = irecv[3];
1859     info->mallocs      = irecv[4];
1860   }
1861   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1862   info->fill_ratio_needed = 0;
1863   info->factor_mallocs    = 0;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetOption_MPIAIJ"
1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1870 {
1871   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1872   PetscErrorCode ierr;
1873 
1874   PetscFunctionBegin;
1875   switch (op) {
1876   case MAT_NEW_NONZERO_LOCATIONS:
1877   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1878   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1879   case MAT_KEEP_NONZERO_PATTERN:
1880   case MAT_NEW_NONZERO_LOCATION_ERR:
1881   case MAT_USE_INODES:
1882   case MAT_IGNORE_ZERO_ENTRIES:
1883     MatCheckPreallocated(A,1);
1884     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1885     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1886     break;
1887   case MAT_ROW_ORIENTED:
1888     a->roworiented = flg;
1889     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1890     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1891     break;
1892   case MAT_NEW_DIAGONALS:
1893     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1894     break;
1895   case MAT_IGNORE_OFF_PROC_ENTRIES:
1896     a->donotstash = flg;
1897     break;
1898   case MAT_SPD:
1899     A->spd_set                         = PETSC_TRUE;
1900     A->spd                             = flg;
1901     if (flg) {
1902       A->symmetric                     = PETSC_TRUE;
1903       A->structurally_symmetric        = PETSC_TRUE;
1904       A->symmetric_set                 = PETSC_TRUE;
1905       A->structurally_symmetric_set    = PETSC_TRUE;
1906     }
1907     break;
1908   case MAT_SYMMETRIC:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   case MAT_STRUCTURALLY_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_HERMITIAN:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_SYMMETRY_ETERNAL:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   default:
1921     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatGetRow_MPIAIJ"
1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1929 {
1930   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1931   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1932   PetscErrorCode ierr;
1933   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1934   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1935   PetscInt       *cmap,*idx_p;
1936 
1937   PetscFunctionBegin;
1938   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1939   mat->getrowactive = PETSC_TRUE;
1940 
1941   if (!mat->rowvalues && (idx || v)) {
1942     /*
1943         allocate enough space to hold information from the longest row.
1944     */
1945     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1946     PetscInt   max = 1,tmp;
1947     for (i=0; i<matin->rmap->n; i++) {
1948       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1949       if (max < tmp) { max = tmp; }
1950     }
1951     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1952   }
1953 
1954   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1955   lrow = row - rstart;
1956 
1957   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1958   if (!v)   {pvA = 0; pvB = 0;}
1959   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1960   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1961   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1962   nztot = nzA + nzB;
1963 
1964   cmap  = mat->garray;
1965   if (v  || idx) {
1966     if (nztot) {
1967       /* Sort by increasing column numbers, assuming A and B already sorted */
1968       PetscInt imark = -1;
1969       if (v) {
1970         *v = v_p = mat->rowvalues;
1971         for (i=0; i<nzB; i++) {
1972           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1973           else break;
1974         }
1975         imark = i;
1976         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1977         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1978       }
1979       if (idx) {
1980         *idx = idx_p = mat->rowindices;
1981         if (imark > -1) {
1982           for (i=0; i<imark; i++) {
1983             idx_p[i] = cmap[cworkB[i]];
1984           }
1985         } else {
1986           for (i=0; i<nzB; i++) {
1987             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1988             else break;
1989           }
1990           imark = i;
1991         }
1992         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1993         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1994       }
1995     } else {
1996       if (idx) *idx = 0;
1997       if (v)   *v   = 0;
1998     }
1999   }
2000   *nz = nztot;
2001   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2002   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2009 {
2010   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2011 
2012   PetscFunctionBegin;
2013   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2014   aij->getrowactive = PETSC_FALSE;
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatNorm_MPIAIJ"
2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2021 {
2022   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2023   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2024   PetscErrorCode ierr;
2025   PetscInt       i,j,cstart = mat->cmap->rstart;
2026   PetscReal      sum = 0.0;
2027   MatScalar      *v;
2028 
2029   PetscFunctionBegin;
2030   if (aij->size == 1) {
2031     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2032   } else {
2033     if (type == NORM_FROBENIUS) {
2034       v = amat->a;
2035       for (i=0; i<amat->nz; i++) {
2036         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2037       }
2038       v = bmat->a;
2039       for (i=0; i<bmat->nz; i++) {
2040         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2041       }
2042       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2043       *norm = PetscSqrtReal(*norm);
2044     } else if (type == NORM_1) { /* max column norm */
2045       PetscReal *tmp,*tmp2;
2046       PetscInt  *jj,*garray = aij->garray;
2047       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2048       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2049       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2050       *norm = 0.0;
2051       v = amat->a; jj = amat->j;
2052       for (j=0; j<amat->nz; j++) {
2053         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2054       }
2055       v = bmat->a; jj = bmat->j;
2056       for (j=0; j<bmat->nz; j++) {
2057         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2058       }
2059       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2060       for (j=0; j<mat->cmap->N; j++) {
2061         if (tmp2[j] > *norm) *norm = tmp2[j];
2062       }
2063       ierr = PetscFree(tmp);CHKERRQ(ierr);
2064       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2065     } else if (type == NORM_INFINITY) { /* max row norm */
2066       PetscReal ntemp = 0.0;
2067       for (j=0; j<aij->A->rmap->n; j++) {
2068         v = amat->a + amat->i[j];
2069         sum = 0.0;
2070         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2071           sum += PetscAbsScalar(*v); v++;
2072         }
2073         v = bmat->a + bmat->i[j];
2074         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2075           sum += PetscAbsScalar(*v); v++;
2076         }
2077         if (sum > ntemp) ntemp = sum;
2078       }
2079       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2080     } else {
2081       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2082     }
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatTranspose_MPIAIJ"
2089 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2090 {
2091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2092   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2093   PetscErrorCode ierr;
2094   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2095   PetscInt       cstart=A->cmap->rstart,ncol;
2096   Mat            B;
2097   MatScalar      *array;
2098 
2099   PetscFunctionBegin;
2100   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2101 
2102   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2103   ai = Aloc->i; aj = Aloc->j;
2104   bi = Bloc->i; bj = Bloc->j;
2105   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2106     PetscInt *d_nnz,*g_nnz,*o_nnz;
2107     PetscSFNode *oloc;
2108     PETSC_UNUSED PetscSF sf;
2109 
2110     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2111     /* compute d_nnz for preallocation */
2112     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2113     for (i=0; i<ai[ma]; i++) {
2114       d_nnz[aj[i]] ++;
2115       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2116     }
2117     /* compute local off-diagonal contributions */
2118     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2119     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2120     /* map those to global */
2121     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2122     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2123     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2124     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2125     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2126     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2127     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2128 
2129     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2130     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2131     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2132     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2133     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2134     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2135   } else {
2136     B = *matout;
2137     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2138     for (i=0; i<ai[ma]; i++) {
2139       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2140     }
2141   }
2142 
2143   /* copy over the A part */
2144   array = Aloc->a;
2145   row = A->rmap->rstart;
2146   for (i=0; i<ma; i++) {
2147     ncol = ai[i+1]-ai[i];
2148     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2149     row++; array += ncol; aj += ncol;
2150   }
2151   aj = Aloc->j;
2152   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2153 
2154   /* copy over the B part */
2155   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2156   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2157   array = Bloc->a;
2158   row = A->rmap->rstart;
2159   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2160   cols_tmp = cols;
2161   for (i=0; i<mb; i++) {
2162     ncol = bi[i+1]-bi[i];
2163     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2164     row++; array += ncol; cols_tmp += ncol;
2165   }
2166   ierr = PetscFree(cols);CHKERRQ(ierr);
2167 
2168   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2170   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2171     *matout = B;
2172   } else {
2173     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2180 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2181 {
2182   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2183   Mat            a = aij->A,b = aij->B;
2184   PetscErrorCode ierr;
2185   PetscInt       s1,s2,s3;
2186 
2187   PetscFunctionBegin;
2188   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2189   if (rr) {
2190     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2191     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2192     /* Overlap communication with computation. */
2193     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2194   }
2195   if (ll) {
2196     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2197     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2198     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2199   }
2200   /* scale  the diagonal block */
2201   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2202 
2203   if (rr) {
2204     /* Do a scatter end and then right scale the off-diagonal block */
2205     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2206     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2207   }
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2213 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2214 {
2215   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2216   PetscErrorCode ierr;
2217 
2218   PetscFunctionBegin;
2219   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatEqual_MPIAIJ"
2225 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2226 {
2227   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2228   Mat            a,b,c,d;
2229   PetscBool      flg;
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   a = matA->A; b = matA->B;
2234   c = matB->A; d = matB->B;
2235 
2236   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2237   if (flg) {
2238     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2239   }
2240   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatCopy_MPIAIJ"
2246 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2247 {
2248   PetscErrorCode ierr;
2249   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2250   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2251 
2252   PetscFunctionBegin;
2253   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2254   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2255     /* because of the column compression in the off-processor part of the matrix a->B,
2256        the number of columns in a->B and b->B may be different, hence we cannot call
2257        the MatCopy() directly on the two parts. If need be, we can provide a more
2258        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2259        then copying the submatrices */
2260     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2261   } else {
2262     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2263     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2264   }
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatSetUp_MPIAIJ"
2270 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2271 {
2272   PetscErrorCode ierr;
2273 
2274   PetscFunctionBegin;
2275   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2281 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2282 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2283 {
2284   PetscInt          i,m=Y->rmap->N;
2285   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2286   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2287   const PetscInt    *xi = x->i,*yi = y->i;
2288 
2289   PetscFunctionBegin;
2290   /* Set the number of nonzeros in the new matrix */
2291   for (i=0; i<m; i++) {
2292     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2293     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2294     nnz[i] = 0;
2295     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2296       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2297       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2298       nnz[i]++;
2299     }
2300     for (; k<nzy; k++) nnz[i]++;
2301   }
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatAXPY_MPIAIJ"
2307 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2308 {
2309   PetscErrorCode ierr;
2310   PetscInt       i;
2311   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2312   PetscBLASInt   bnz,one=1;
2313   Mat_SeqAIJ     *x,*y;
2314 
2315   PetscFunctionBegin;
2316   if (str == SAME_NONZERO_PATTERN) {
2317     PetscScalar alpha = a;
2318     x    = (Mat_SeqAIJ *)xx->A->data;
2319     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2320     y    = (Mat_SeqAIJ *)yy->A->data;
2321     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2322     x    = (Mat_SeqAIJ *)xx->B->data;
2323     y    = (Mat_SeqAIJ *)yy->B->data;
2324     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2325     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2326   } else if (str == SUBSET_NONZERO_PATTERN) {
2327     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2328 
2329     x = (Mat_SeqAIJ *)xx->B->data;
2330     y = (Mat_SeqAIJ *)yy->B->data;
2331     if (y->xtoy && y->XtoY != xx->B) {
2332       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2333       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2334     }
2335     if (!y->xtoy) { /* get xtoy */
2336       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2337       y->XtoY = xx->B;
2338       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2339     }
2340     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2341   } else {
2342     Mat B;
2343     PetscInt *nnz_d,*nnz_o;
2344     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2345     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2346     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2347     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2348     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2349     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2350     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2351     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2352     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2353     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2354     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2355     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2356     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2357     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2358   }
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatConjugate_MPIAIJ"
2366 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2367 {
2368 #if defined(PETSC_USE_COMPLEX)
2369   PetscErrorCode ierr;
2370   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2374   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2375 #else
2376   PetscFunctionBegin;
2377 #endif
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatRealPart_MPIAIJ"
2383 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2384 {
2385   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2390   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2396 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2397 {
2398   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2403   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #if defined(PETSC_HAVE_PBGL)
2408 
2409 #include <boost/parallel/mpi/bsp_process_group.hpp>
2410 #include <boost/graph/distributed/ilu_default_graph.hpp>
2411 #include <boost/graph/distributed/ilu_0_block.hpp>
2412 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2413 #include <boost/graph/distributed/petsc/interface.hpp>
2414 #include <boost/multi_array.hpp>
2415 #include <boost/parallel/distributed_property_map->hpp>
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2419 /*
2420   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2421 */
2422 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2423 {
2424   namespace petsc = boost::distributed::petsc;
2425 
2426   namespace graph_dist = boost::graph::distributed;
2427   using boost::graph::distributed::ilu_default::process_group_type;
2428   using boost::graph::ilu_permuted;
2429 
2430   PetscBool       row_identity, col_identity;
2431   PetscContainer  c;
2432   PetscInt        m, n, M, N;
2433   PetscErrorCode  ierr;
2434 
2435   PetscFunctionBegin;
2436   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2437   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2438   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2439   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2440 
2441   process_group_type pg;
2442   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2443   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2444   lgraph_type&   level_graph = *lgraph_p;
2445   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2446 
2447   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2448   ilu_permuted(level_graph);
2449 
2450   /* put together the new matrix */
2451   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2452   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2453   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2454   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2455   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2456   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2457   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2458   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2459 
2460   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2461   ierr = PetscContainerSetPointer(c, lgraph_p);
2462   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2463   ierr = PetscContainerDestroy(&c);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2469 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2470 {
2471   PetscFunctionBegin;
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatSolve_MPIAIJ"
2477 /*
2478   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2479 */
2480 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2481 {
2482   namespace graph_dist = boost::graph::distributed;
2483 
2484   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2485   lgraph_type*   lgraph_p;
2486   PetscContainer c;
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2491   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2492   ierr = VecCopy(b, x);CHKERRQ(ierr);
2493 
2494   PetscScalar* array_x;
2495   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2496   PetscInt sx;
2497   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2498 
2499   PetscScalar* array_b;
2500   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2501   PetscInt sb;
2502   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2503 
2504   lgraph_type&   level_graph = *lgraph_p;
2505   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2506 
2507   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2508   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2509                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2510 
2511   typedef boost::iterator_property_map<array_ref_type::iterator,
2512                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2513   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2514                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2515 
2516   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2517   PetscFunctionReturn(0);
2518 }
2519 #endif
2520 
2521 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2522   PetscInt       nzlocal,nsends,nrecvs;
2523   PetscMPIInt    *send_rank,*recv_rank;
2524   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2525   PetscScalar    *sbuf_a,**rbuf_a;
2526   PetscErrorCode (*Destroy)(Mat);
2527 } Mat_Redundant;
2528 
2529 #undef __FUNCT__
2530 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2531 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2532 {
2533   PetscErrorCode       ierr;
2534   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2535   PetscInt             i;
2536 
2537   PetscFunctionBegin;
2538   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2539   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2540   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2541   for (i=0; i<redund->nrecvs; i++) {
2542     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2543     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2544   }
2545   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2546   ierr = PetscFree(redund);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 #undef __FUNCT__
2551 #define __FUNCT__ "MatDestroy_MatRedundant"
2552 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2553 {
2554   PetscErrorCode  ierr;
2555   PetscContainer  container;
2556   Mat_Redundant   *redund=PETSC_NULL;
2557 
2558   PetscFunctionBegin;
2559   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2560   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2561   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2562   A->ops->destroy = redund->Destroy;
2563   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2564   if (A->ops->destroy) {
2565     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2566   }
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 #undef __FUNCT__
2571 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2572 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2573 {
2574   PetscMPIInt    rank,size;
2575   MPI_Comm       comm=((PetscObject)mat)->comm;
2576   PetscErrorCode ierr;
2577   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2578   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2579   PetscInt       *rowrange=mat->rmap->range;
2580   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2581   Mat            A=aij->A,B=aij->B,C=*matredundant;
2582   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2583   PetscScalar    *sbuf_a;
2584   PetscInt       nzlocal=a->nz+b->nz;
2585   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2586   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2587   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2588   MatScalar      *aworkA,*aworkB;
2589   PetscScalar    *vals;
2590   PetscMPIInt    tag1,tag2,tag3,imdex;
2591   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2592                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2593   MPI_Status     recv_status,*send_status;
2594   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2595   PetscInt       **rbuf_j=PETSC_NULL;
2596   PetscScalar    **rbuf_a=PETSC_NULL;
2597   Mat_Redundant  *redund=PETSC_NULL;
2598   PetscContainer container;
2599 
2600   PetscFunctionBegin;
2601   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2603 
2604   if (reuse == MAT_REUSE_MATRIX) {
2605     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2606     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2607     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2608     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2609     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2610     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2611     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2612     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2613 
2614     nsends    = redund->nsends;
2615     nrecvs    = redund->nrecvs;
2616     send_rank = redund->send_rank;
2617     recv_rank = redund->recv_rank;
2618     sbuf_nz   = redund->sbuf_nz;
2619     rbuf_nz   = redund->rbuf_nz;
2620     sbuf_j    = redund->sbuf_j;
2621     sbuf_a    = redund->sbuf_a;
2622     rbuf_j    = redund->rbuf_j;
2623     rbuf_a    = redund->rbuf_a;
2624   }
2625 
2626   if (reuse == MAT_INITIAL_MATRIX) {
2627     PetscMPIInt  subrank,subsize;
2628     PetscInt     nleftover,np_subcomm;
2629     /* get the destination processors' id send_rank, nsends and nrecvs */
2630     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2631     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2632     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2633     np_subcomm = size/nsubcomm;
2634     nleftover  = size - nsubcomm*np_subcomm;
2635     nsends = 0; nrecvs = 0;
2636     for (i=0; i<size; i++) { /* i=rank*/
2637       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2638         send_rank[nsends] = i; nsends++;
2639         recv_rank[nrecvs++] = i;
2640       }
2641     }
2642     if (rank >= size - nleftover) {/* this proc is a leftover processor */
2643       i = size-nleftover-1;
2644       j = 0;
2645       while (j < nsubcomm - nleftover) {
2646         send_rank[nsends++] = i;
2647         i--; j++;
2648       }
2649     }
2650 
2651     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2652       for (i=0; i<nleftover; i++) {
2653         recv_rank[nrecvs++] = size-nleftover+i;
2654       }
2655     }
2656 
2657     /* allocate sbuf_j, sbuf_a */
2658     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2659     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2660     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2661   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2662 
2663   /* copy mat's local entries into the buffers */
2664   if (reuse == MAT_INITIAL_MATRIX) {
2665     rownz_max = 0;
2666     rptr = sbuf_j;
2667     cols = sbuf_j + rend-rstart + 1;
2668     vals = sbuf_a;
2669     rptr[0] = 0;
2670     for (i=0; i<rend-rstart; i++) {
2671       row = i + rstart;
2672       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2673       ncols  = nzA + nzB;
2674       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2675       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2676       /* load the column indices for this row into cols */
2677       lwrite = 0;
2678       for (l=0; l<nzB; l++) {
2679         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2680           vals[lwrite]   = aworkB[l];
2681           cols[lwrite++] = ctmp;
2682         }
2683       }
2684       for (l=0; l<nzA; l++) {
2685         vals[lwrite]   = aworkA[l];
2686         cols[lwrite++] = cstart + cworkA[l];
2687       }
2688       for (l=0; l<nzB; l++) {
2689         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2690           vals[lwrite]   = aworkB[l];
2691           cols[lwrite++] = ctmp;
2692         }
2693       }
2694       vals += ncols;
2695       cols += ncols;
2696       rptr[i+1] = rptr[i] + ncols;
2697       if (rownz_max < ncols) rownz_max = ncols;
2698     }
2699     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2700   } else { /* only copy matrix values into sbuf_a */
2701     rptr = sbuf_j;
2702     vals = sbuf_a;
2703     rptr[0] = 0;
2704     for (i=0; i<rend-rstart; i++) {
2705       row = i + rstart;
2706       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2707       ncols  = nzA + nzB;
2708       cworkB = b->j + b->i[i];
2709       aworkA = a->a + a->i[i];
2710       aworkB = b->a + b->i[i];
2711       lwrite = 0;
2712       for (l=0; l<nzB; l++) {
2713         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2714       }
2715       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2716       for (l=0; l<nzB; l++) {
2717         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2718       }
2719       vals += ncols;
2720       rptr[i+1] = rptr[i] + ncols;
2721     }
2722   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2723 
2724   /* send nzlocal to others, and recv other's nzlocal */
2725   /*--------------------------------------------------*/
2726   if (reuse == MAT_INITIAL_MATRIX) {
2727     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2728     s_waits2 = s_waits3 + nsends;
2729     s_waits1 = s_waits2 + nsends;
2730     r_waits1 = s_waits1 + nsends;
2731     r_waits2 = r_waits1 + nrecvs;
2732     r_waits3 = r_waits2 + nrecvs;
2733   } else {
2734     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2735     r_waits3 = s_waits3 + nsends;
2736   }
2737 
2738   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2739   if (reuse == MAT_INITIAL_MATRIX) {
2740     /* get new tags to keep the communication clean */
2741     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2742     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2743     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2744 
2745     /* post receives of other's nzlocal */
2746     for (i=0; i<nrecvs; i++) {
2747       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2748     }
2749     /* send nzlocal to others */
2750     for (i=0; i<nsends; i++) {
2751       sbuf_nz[i] = nzlocal;
2752       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2753     }
2754     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2755     count = nrecvs;
2756     while (count) {
2757       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2758       recv_rank[imdex] = recv_status.MPI_SOURCE;
2759       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2760       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2761 
2762       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2763       rbuf_nz[imdex] += i + 2;
2764       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2765       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2766       count--;
2767     }
2768     /* wait on sends of nzlocal */
2769     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2770     /* send mat->i,j to others, and recv from other's */
2771     /*------------------------------------------------*/
2772     for (i=0; i<nsends; i++) {
2773       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2774       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2775     }
2776     /* wait on receives of mat->i,j */
2777     /*------------------------------*/
2778     count = nrecvs;
2779     while (count) {
2780       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2781       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2782       count--;
2783     }
2784     /* wait on sends of mat->i,j */
2785     /*---------------------------*/
2786     if (nsends) {
2787       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2788     }
2789   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2790 
2791   /* post receives, send and receive mat->a */
2792   /*----------------------------------------*/
2793   for (imdex=0; imdex<nrecvs; imdex++) {
2794     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2795   }
2796   for (i=0; i<nsends; i++) {
2797     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2798   }
2799   count = nrecvs;
2800   while (count) {
2801     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2802     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2803     count--;
2804   }
2805   if (nsends) {
2806     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2807   }
2808 
2809   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2810 
2811   /* create redundant matrix */
2812   /*-------------------------*/
2813   if (reuse == MAT_INITIAL_MATRIX) {
2814     /* compute rownz_max for preallocation */
2815     for (imdex=0; imdex<nrecvs; imdex++) {
2816       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2817       rptr = rbuf_j[imdex];
2818       for (i=0; i<j; i++) {
2819         ncols = rptr[i+1] - rptr[i];
2820         if (rownz_max < ncols) rownz_max = ncols;
2821       }
2822     }
2823 
2824     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2825     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2826     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2827     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2828     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2829     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2830   } else {
2831     C = *matredundant;
2832   }
2833 
2834   /* insert local matrix entries */
2835   rptr = sbuf_j;
2836   cols = sbuf_j + rend-rstart + 1;
2837   vals = sbuf_a;
2838   for (i=0; i<rend-rstart; i++) {
2839     row   = i + rstart;
2840     ncols = rptr[i+1] - rptr[i];
2841     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2842     vals += ncols;
2843     cols += ncols;
2844   }
2845   /* insert received matrix entries */
2846   for (imdex=0; imdex<nrecvs; imdex++) {
2847     rstart = rowrange[recv_rank[imdex]];
2848     rend   = rowrange[recv_rank[imdex]+1];
2849     rptr = rbuf_j[imdex];
2850     cols = rbuf_j[imdex] + rend-rstart + 1;
2851     vals = rbuf_a[imdex];
2852     for (i=0; i<rend-rstart; i++) {
2853       row   = i + rstart;
2854       ncols = rptr[i+1] - rptr[i];
2855       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2856       vals += ncols;
2857       cols += ncols;
2858     }
2859   }
2860   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2863   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2864   if (reuse == MAT_INITIAL_MATRIX) {
2865     PetscContainer container;
2866     *matredundant = C;
2867     /* create a supporting struct and attach it to C for reuse */
2868     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2869     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2870     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2871     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2872     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2873     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2874 
2875     redund->nzlocal = nzlocal;
2876     redund->nsends  = nsends;
2877     redund->nrecvs  = nrecvs;
2878     redund->send_rank = send_rank;
2879     redund->recv_rank = recv_rank;
2880     redund->sbuf_nz = sbuf_nz;
2881     redund->rbuf_nz = rbuf_nz;
2882     redund->sbuf_j  = sbuf_j;
2883     redund->sbuf_a  = sbuf_a;
2884     redund->rbuf_j  = rbuf_j;
2885     redund->rbuf_a  = rbuf_a;
2886 
2887     redund->Destroy = C->ops->destroy;
2888     C->ops->destroy = MatDestroy_MatRedundant;
2889   }
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2896 {
2897   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2898   PetscErrorCode ierr;
2899   PetscInt       i,*idxb = 0;
2900   PetscScalar    *va,*vb;
2901   Vec            vtmp;
2902 
2903   PetscFunctionBegin;
2904   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2905   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2906   if (idx) {
2907     for (i=0; i<A->rmap->n; i++) {
2908       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2909     }
2910   }
2911 
2912   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2913   if (idx) {
2914     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2915   }
2916   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2917   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2918 
2919   for (i=0; i<A->rmap->n; i++) {
2920     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2921       va[i] = vb[i];
2922       if (idx) idx[i] = a->garray[idxb[i]];
2923     }
2924   }
2925 
2926   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2928   ierr = PetscFree(idxb);CHKERRQ(ierr);
2929   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2936 {
2937   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2938   PetscErrorCode ierr;
2939   PetscInt       i,*idxb = 0;
2940   PetscScalar    *va,*vb;
2941   Vec            vtmp;
2942 
2943   PetscFunctionBegin;
2944   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2945   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2946   if (idx) {
2947     for (i=0; i<A->cmap->n; i++) {
2948       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2949     }
2950   }
2951 
2952   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2953   if (idx) {
2954     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2955   }
2956   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2957   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2958 
2959   for (i=0; i<A->rmap->n; i++) {
2960     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2961       va[i] = vb[i];
2962       if (idx) idx[i] = a->garray[idxb[i]];
2963     }
2964   }
2965 
2966   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2967   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2968   ierr = PetscFree(idxb);CHKERRQ(ierr);
2969   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2976 {
2977   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2978   PetscInt       n      = A->rmap->n;
2979   PetscInt       cstart = A->cmap->rstart;
2980   PetscInt      *cmap   = mat->garray;
2981   PetscInt      *diagIdx, *offdiagIdx;
2982   Vec            diagV, offdiagV;
2983   PetscScalar   *a, *diagA, *offdiagA;
2984   PetscInt       r;
2985   PetscErrorCode ierr;
2986 
2987   PetscFunctionBegin;
2988   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2990   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2991   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2992   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2993   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   for (r = 0; r < n; ++r) {
2997     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2998       a[r]   = diagA[r];
2999       idx[r] = cstart + diagIdx[r];
3000     } else {
3001       a[r]   = offdiagA[r];
3002       idx[r] = cmap[offdiagIdx[r]];
3003     }
3004   }
3005   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3007   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3008   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3009   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3010   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3017 {
3018   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3019   PetscInt       n      = A->rmap->n;
3020   PetscInt       cstart = A->cmap->rstart;
3021   PetscInt      *cmap   = mat->garray;
3022   PetscInt      *diagIdx, *offdiagIdx;
3023   Vec            diagV, offdiagV;
3024   PetscScalar   *a, *diagA, *offdiagA;
3025   PetscInt       r;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3030   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3031   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3032   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3033   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3034   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3035   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3036   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3037   for (r = 0; r < n; ++r) {
3038     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3039       a[r]   = diagA[r];
3040       idx[r] = cstart + diagIdx[r];
3041     } else {
3042       a[r]   = offdiagA[r];
3043       idx[r] = cmap[offdiagIdx[r]];
3044     }
3045   }
3046   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3047   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3048   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3049   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3050   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3051   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3058 {
3059   PetscErrorCode ierr;
3060   Mat            *dummy;
3061 
3062   PetscFunctionBegin;
3063   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3064   *newmat = *dummy;
3065   ierr = PetscFree(dummy);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3073 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3074 {
3075   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3076   PetscErrorCode ierr;
3077 
3078   PetscFunctionBegin;
3079   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3085 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3086 {
3087   PetscErrorCode ierr;
3088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3089 
3090   PetscFunctionBegin;
3091   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3092   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3093   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 /* -------------------------------------------------------------------*/
3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3100        MatGetRow_MPIAIJ,
3101        MatRestoreRow_MPIAIJ,
3102        MatMult_MPIAIJ,
3103 /* 4*/ MatMultAdd_MPIAIJ,
3104        MatMultTranspose_MPIAIJ,
3105        MatMultTransposeAdd_MPIAIJ,
3106 #if defined(PETSC_HAVE_PBGL)
3107        MatSolve_MPIAIJ,
3108 #else
3109        0,
3110 #endif
3111        0,
3112        0,
3113 /*10*/ 0,
3114        0,
3115        0,
3116        MatSOR_MPIAIJ,
3117        MatTranspose_MPIAIJ,
3118 /*15*/ MatGetInfo_MPIAIJ,
3119        MatEqual_MPIAIJ,
3120        MatGetDiagonal_MPIAIJ,
3121        MatDiagonalScale_MPIAIJ,
3122        MatNorm_MPIAIJ,
3123 /*20*/ MatAssemblyBegin_MPIAIJ,
3124        MatAssemblyEnd_MPIAIJ,
3125        MatSetOption_MPIAIJ,
3126        MatZeroEntries_MPIAIJ,
3127 /*24*/ MatZeroRows_MPIAIJ,
3128        0,
3129 #if defined(PETSC_HAVE_PBGL)
3130        0,
3131 #else
3132        0,
3133 #endif
3134        0,
3135        0,
3136 /*29*/ MatSetUp_MPIAIJ,
3137 #if defined(PETSC_HAVE_PBGL)
3138        0,
3139 #else
3140        0,
3141 #endif
3142        0,
3143        0,
3144        0,
3145 /*34*/ MatDuplicate_MPIAIJ,
3146        0,
3147        0,
3148        0,
3149        0,
3150 /*39*/ MatAXPY_MPIAIJ,
3151        MatGetSubMatrices_MPIAIJ,
3152        MatIncreaseOverlap_MPIAIJ,
3153        MatGetValues_MPIAIJ,
3154        MatCopy_MPIAIJ,
3155 /*44*/ MatGetRowMax_MPIAIJ,
3156        MatScale_MPIAIJ,
3157        0,
3158        0,
3159        MatZeroRowsColumns_MPIAIJ,
3160 /*49*/ MatSetRandom_MPIAIJ,
3161        0,
3162        0,
3163        0,
3164        0,
3165 /*54*/ MatFDColoringCreate_MPIAIJ,
3166        0,
3167        MatSetUnfactored_MPIAIJ,
3168        MatPermute_MPIAIJ,
3169        0,
3170 /*59*/ MatGetSubMatrix_MPIAIJ,
3171        MatDestroy_MPIAIJ,
3172        MatView_MPIAIJ,
3173        0,
3174        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3176        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3177        0,
3178        0,
3179        0,
3180 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3181        MatGetRowMinAbs_MPIAIJ,
3182        0,
3183        MatSetColoring_MPIAIJ,
3184        0,
3185        MatSetValuesAdifor_MPIAIJ,
3186 /*75*/ MatFDColoringApply_AIJ,
3187        0,
3188        0,
3189        0,
3190        MatFindZeroDiagonals_MPIAIJ,
3191 /*80*/ 0,
3192        0,
3193        0,
3194 /*83*/ MatLoad_MPIAIJ,
3195        0,
3196        0,
3197        0,
3198        0,
3199        0,
3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3201        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3202        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3203        MatPtAP_MPIAIJ_MPIAIJ,
3204        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3206        0,
3207        0,
3208        0,
3209        0,
3210 /*99*/ 0,
3211        0,
3212        0,
3213        MatConjugate_MPIAIJ,
3214        0,
3215 /*104*/MatSetValuesRow_MPIAIJ,
3216        MatRealPart_MPIAIJ,
3217        MatImaginaryPart_MPIAIJ,
3218        0,
3219        0,
3220 /*109*/0,
3221        MatGetRedundantMatrix_MPIAIJ,
3222        MatGetRowMin_MPIAIJ,
3223        0,
3224        0,
3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3226        0,
3227        0,
3228        0,
3229        0,
3230 /*119*/0,
3231        0,
3232        0,
3233        0,
3234        MatGetMultiProcBlock_MPIAIJ,
3235 /*124*/MatFindNonzeroRows_MPIAIJ,
3236        MatGetColumnNorms_MPIAIJ,
3237        MatInvertBlockDiagonal_MPIAIJ,
3238        0,
3239        MatGetSubMatricesParallel_MPIAIJ,
3240 /*129*/0,
3241        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3242        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3243        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3244        0,
3245 /*134*/0,
3246        0,
3247        0,
3248        0,
3249        0
3250 };
3251 
3252 /* ----------------------------------------------------------------------------------------*/
3253 
3254 EXTERN_C_BEGIN
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3257 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3258 {
3259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3260   PetscErrorCode ierr;
3261 
3262   PetscFunctionBegin;
3263   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3264   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 EXTERN_C_END
3268 
3269 EXTERN_C_BEGIN
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3272 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3273 {
3274   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3275   PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3279   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3280   PetscFunctionReturn(0);
3281 }
3282 EXTERN_C_END
3283 
3284 EXTERN_C_BEGIN
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3287 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3288 {
3289   Mat_MPIAIJ     *b;
3290   PetscErrorCode ierr;
3291   PetscInt       i;
3292   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3293 
3294   PetscFunctionBegin;
3295   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3296   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3297   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3298   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3299   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3300   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3301 
3302   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3303   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3304   if (d_nnz) {
3305     for (i=0; i<B->rmap->n; i++) {
3306       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3307     }
3308   }
3309   if (o_nnz) {
3310     for (i=0; i<B->rmap->n; i++) {
3311       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3312     }
3313   }
3314   b = (Mat_MPIAIJ*)B->data;
3315 
3316   if (!B->preallocated) {
3317     /* Explicitly create 2 MATSEQAIJ matrices. */
3318     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3319     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3320     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3321     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3322     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3323     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3324     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3325     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3326     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3327     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3328   }
3329 
3330   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3331   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3332   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3333   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3334   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3335   B->preallocated = PETSC_TRUE;
3336   PetscFunctionReturn(0);
3337 }
3338 EXTERN_C_END
3339 
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3342 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3343 {
3344   Mat            mat;
3345   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3346   PetscErrorCode ierr;
3347 
3348   PetscFunctionBegin;
3349   *newmat       = 0;
3350   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3351   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3352   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3353   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3354   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3355   a    = (Mat_MPIAIJ*)mat->data;
3356 
3357   mat->factortype    = matin->factortype;
3358   mat->rmap->bs      = matin->rmap->bs;
3359   mat->cmap->bs      = matin->cmap->bs;
3360   mat->assembled    = PETSC_TRUE;
3361   mat->insertmode   = NOT_SET_VALUES;
3362   mat->preallocated = PETSC_TRUE;
3363 
3364   a->size           = oldmat->size;
3365   a->rank           = oldmat->rank;
3366   a->donotstash     = oldmat->donotstash;
3367   a->roworiented    = oldmat->roworiented;
3368   a->rowindices     = 0;
3369   a->rowvalues      = 0;
3370   a->getrowactive   = PETSC_FALSE;
3371 
3372   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3373   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3374 
3375   if (oldmat->colmap) {
3376 #if defined (PETSC_USE_CTABLE)
3377     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3378 #else
3379     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3380     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3381     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3382 #endif
3383   } else a->colmap = 0;
3384   if (oldmat->garray) {
3385     PetscInt len;
3386     len  = oldmat->B->cmap->n;
3387     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3388     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3389     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3390   } else a->garray = 0;
3391 
3392   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3393   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3394   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3395   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3396   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3397   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3398   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3399   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3400   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3401   *newmat = mat;
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 
3406 
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatLoad_MPIAIJ"
3409 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3410 {
3411   PetscScalar    *vals,*svals;
3412   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3413   PetscErrorCode ierr;
3414   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3415   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3416   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3417   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3418   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3419   int            fd;
3420   PetscInt       bs = 1;
3421 
3422   PetscFunctionBegin;
3423   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3424   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3425   if (!rank) {
3426     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3427     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3428     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3429   }
3430 
3431   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3432   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3433   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3434 
3435   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3436 
3437   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3438   M = header[1]; N = header[2];
3439   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3440   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3441   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3442 
3443   /* If global sizes are set, check if they are consistent with that given in the file */
3444   if (sizesset) {
3445     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3446   }
3447   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3448   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3449 
3450   /* determine ownership of all (block) rows */
3451   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3452   if (newMat->rmap->n < 0) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3453   else m = newMat->rmap->n; /* Set by user */
3454 
3455   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3456   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3457 
3458   /* First process needs enough room for process with most rows */
3459   if (!rank) {
3460     mmax = rowners[1];
3461     for (i=2; i<=size; i++) {
3462       mmax = PetscMax(mmax, rowners[i]);
3463     }
3464   } else mmax = m;
3465 
3466   rowners[0] = 0;
3467   for (i=2; i<=size; i++) {
3468     rowners[i] += rowners[i-1];
3469   }
3470   rstart = rowners[rank];
3471   rend   = rowners[rank+1];
3472 
3473   /* distribute row lengths to all processors */
3474   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3475   if (!rank) {
3476     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3477     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3478     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3479     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3480     for (j=0; j<m; j++) {
3481       procsnz[0] += ourlens[j];
3482     }
3483     for (i=1; i<size; i++) {
3484       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3485       /* calculate the number of nonzeros on each processor */
3486       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3487         procsnz[i] += rowlengths[j];
3488       }
3489       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3490     }
3491     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3492   } else {
3493     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3494   }
3495 
3496   if (!rank) {
3497     /* determine max buffer needed and allocate it */
3498     maxnz = 0;
3499     for (i=0; i<size; i++) {
3500       maxnz = PetscMax(maxnz,procsnz[i]);
3501     }
3502     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3503 
3504     /* read in my part of the matrix column indices  */
3505     nz   = procsnz[0];
3506     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3507     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3508 
3509     /* read in every one elses and ship off */
3510     for (i=1; i<size; i++) {
3511       nz     = procsnz[i];
3512       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3513       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3514     }
3515     ierr = PetscFree(cols);CHKERRQ(ierr);
3516   } else {
3517     /* determine buffer space needed for message */
3518     nz = 0;
3519     for (i=0; i<m; i++) {
3520       nz += ourlens[i];
3521     }
3522     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3523 
3524     /* receive message of column indices*/
3525     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3526   }
3527 
3528   /* determine column ownership if matrix is not square */
3529   if (N != M) {
3530     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3531     else n = newMat->cmap->n;
3532     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3533     cstart = cend - n;
3534   } else {
3535     cstart = rstart;
3536     cend   = rend;
3537     n      = cend - cstart;
3538   }
3539 
3540   /* loop over local rows, determining number of off diagonal entries */
3541   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3542   jj = 0;
3543   for (i=0; i<m; i++) {
3544     for (j=0; j<ourlens[i]; j++) {
3545       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3546       jj++;
3547     }
3548   }
3549 
3550   for (i=0; i<m; i++) {
3551     ourlens[i] -= offlens[i];
3552   }
3553   if (!sizesset) {
3554     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3555   }
3556 
3557   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3558 
3559   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3560 
3561   for (i=0; i<m; i++) {
3562     ourlens[i] += offlens[i];
3563   }
3564 
3565   if (!rank) {
3566     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3567 
3568     /* read in my part of the matrix numerical values  */
3569     nz   = procsnz[0];
3570     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3571 
3572     /* insert into matrix */
3573     jj      = rstart;
3574     smycols = mycols;
3575     svals   = vals;
3576     for (i=0; i<m; i++) {
3577       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3578       smycols += ourlens[i];
3579       svals   += ourlens[i];
3580       jj++;
3581     }
3582 
3583     /* read in other processors and ship out */
3584     for (i=1; i<size; i++) {
3585       nz     = procsnz[i];
3586       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3587       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3588     }
3589     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3590   } else {
3591     /* receive numeric values */
3592     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3593 
3594     /* receive message of values*/
3595     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3596 
3597     /* insert into matrix */
3598     jj      = rstart;
3599     smycols = mycols;
3600     svals   = vals;
3601     for (i=0; i<m; i++) {
3602       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3603       smycols += ourlens[i];
3604       svals   += ourlens[i];
3605       jj++;
3606     }
3607   }
3608   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3609   ierr = PetscFree(vals);CHKERRQ(ierr);
3610   ierr = PetscFree(mycols);CHKERRQ(ierr);
3611   ierr = PetscFree(rowners);CHKERRQ(ierr);
3612   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3613   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 #undef __FUNCT__
3618 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3619 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3620 {
3621   PetscErrorCode ierr;
3622   IS             iscol_local;
3623   PetscInt       csize;
3624 
3625   PetscFunctionBegin;
3626   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3627   if (call == MAT_REUSE_MATRIX) {
3628     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3629     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3630   } else {
3631     PetscInt cbs;
3632     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3633     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3634     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3635   }
3636   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3637   if (call == MAT_INITIAL_MATRIX) {
3638     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3639     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3640   }
3641   PetscFunctionReturn(0);
3642 }
3643 
3644 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3645 #undef __FUNCT__
3646 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3647 /*
3648     Not great since it makes two copies of the submatrix, first an SeqAIJ
3649   in local and then by concatenating the local matrices the end result.
3650   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3651 
3652   Note: This requires a sequential iscol with all indices.
3653 */
3654 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3655 {
3656   PetscErrorCode ierr;
3657   PetscMPIInt    rank,size;
3658   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3659   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3660   PetscBool      allcolumns, colflag;
3661   Mat            M,Mreuse;
3662   MatScalar      *vwork,*aa;
3663   MPI_Comm       comm = ((PetscObject)mat)->comm;
3664   Mat_SeqAIJ     *aij;
3665 
3666 
3667   PetscFunctionBegin;
3668   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3669   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3670 
3671   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3672   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3673   if (colflag && ncol == mat->cmap->N) {
3674     allcolumns = PETSC_TRUE;
3675   } else {
3676     allcolumns = PETSC_FALSE;
3677   }
3678   if (call ==  MAT_REUSE_MATRIX) {
3679     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3680     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3681     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3682   } else {
3683     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3684   }
3685 
3686   /*
3687       m - number of local rows
3688       n - number of columns (same on all processors)
3689       rstart - first row in new global matrix generated
3690   */
3691   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3692   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3693   if (call == MAT_INITIAL_MATRIX) {
3694     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3695     ii  = aij->i;
3696     jj  = aij->j;
3697 
3698     /*
3699         Determine the number of non-zeros in the diagonal and off-diagonal
3700         portions of the matrix in order to do correct preallocation
3701     */
3702 
3703     /* first get start and end of "diagonal" columns */
3704     if (csize == PETSC_DECIDE) {
3705       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3706       if (mglobal == n) { /* square matrix */
3707         nlocal = m;
3708       } else {
3709         nlocal = n/size + ((n % size) > rank);
3710       }
3711     } else {
3712       nlocal = csize;
3713     }
3714     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3715     rstart = rend - nlocal;
3716     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3717 
3718     /* next, compute all the lengths */
3719     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3720     olens = dlens + m;
3721     for (i=0; i<m; i++) {
3722       jend = ii[i+1] - ii[i];
3723       olen = 0;
3724       dlen = 0;
3725       for (j=0; j<jend; j++) {
3726         if (*jj < rstart || *jj >= rend) olen++;
3727         else dlen++;
3728         jj++;
3729       }
3730       olens[i] = olen;
3731       dlens[i] = dlen;
3732     }
3733     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3734     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3735     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3736     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3737     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3738     ierr = PetscFree(dlens);CHKERRQ(ierr);
3739   } else {
3740     PetscInt ml,nl;
3741 
3742     M = *newmat;
3743     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3744     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3745     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3746     /*
3747          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3748        rather than the slower MatSetValues().
3749     */
3750     M->was_assembled = PETSC_TRUE;
3751     M->assembled     = PETSC_FALSE;
3752   }
3753   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3754   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3755   ii  = aij->i;
3756   jj  = aij->j;
3757   aa  = aij->a;
3758   for (i=0; i<m; i++) {
3759     row   = rstart + i;
3760     nz    = ii[i+1] - ii[i];
3761     cwork = jj;     jj += nz;
3762     vwork = aa;     aa += nz;
3763     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3764   }
3765 
3766   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3767   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3768   *newmat = M;
3769 
3770   /* save submatrix used in processor for next request */
3771   if (call ==  MAT_INITIAL_MATRIX) {
3772     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3773     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3774   }
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 EXTERN_C_BEGIN
3779 #undef __FUNCT__
3780 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3781 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3782 {
3783   PetscInt       m,cstart, cend,j,nnz,i,d;
3784   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3785   const PetscInt *JJ;
3786   PetscScalar    *values;
3787   PetscErrorCode ierr;
3788 
3789   PetscFunctionBegin;
3790   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3791 
3792   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3793   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3794   m      = B->rmap->n;
3795   cstart = B->cmap->rstart;
3796   cend   = B->cmap->rend;
3797   rstart = B->rmap->rstart;
3798 
3799   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3800 
3801 #if defined(PETSC_USE_DEBUGGING)
3802   for (i=0; i<m; i++) {
3803     nnz     = Ii[i+1]- Ii[i];
3804     JJ      = J + Ii[i];
3805     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3806     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3807     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3808   }
3809 #endif
3810 
3811   for (i=0; i<m; i++) {
3812     nnz     = Ii[i+1]- Ii[i];
3813     JJ      = J + Ii[i];
3814     nnz_max = PetscMax(nnz_max,nnz);
3815     d       = 0;
3816     for (j=0; j<nnz; j++) {
3817       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3818     }
3819     d_nnz[i] = d;
3820     o_nnz[i] = nnz - d;
3821   }
3822   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3823   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3824 
3825   if (v) values = (PetscScalar*)v;
3826   else {
3827     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3828     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3829   }
3830 
3831   for (i=0; i<m; i++) {
3832     ii   = i + rstart;
3833     nnz  = Ii[i+1]- Ii[i];
3834     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3835   }
3836   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3837   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3838 
3839   if (!v) {
3840     ierr = PetscFree(values);CHKERRQ(ierr);
3841   }
3842   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3843   PetscFunctionReturn(0);
3844 }
3845 EXTERN_C_END
3846 
3847 #undef __FUNCT__
3848 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3849 /*@
3850    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3851    (the default parallel PETSc format).
3852 
3853    Collective on MPI_Comm
3854 
3855    Input Parameters:
3856 +  B - the matrix
3857 .  i - the indices into j for the start of each local row (starts with zero)
3858 .  j - the column indices for each local row (starts with zero)
3859 -  v - optional values in the matrix
3860 
3861    Level: developer
3862 
3863    Notes:
3864        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3865      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3866      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3867 
3868        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3869 
3870        The format which is used for the sparse matrix input, is equivalent to a
3871     row-major ordering.. i.e for the following matrix, the input data expected is
3872     as shown:
3873 
3874         1 0 0
3875         2 0 3     P0
3876        -------
3877         4 5 6     P1
3878 
3879      Process0 [P0]: rows_owned=[0,1]
3880         i =  {0,1,3}  [size = nrow+1  = 2+1]
3881         j =  {0,0,2}  [size = nz = 6]
3882         v =  {1,2,3}  [size = nz = 6]
3883 
3884      Process1 [P1]: rows_owned=[2]
3885         i =  {0,3}    [size = nrow+1  = 1+1]
3886         j =  {0,1,2}  [size = nz = 6]
3887         v =  {4,5,6}  [size = nz = 6]
3888 
3889 .keywords: matrix, aij, compressed row, sparse, parallel
3890 
3891 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3892           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3893 @*/
3894 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3895 {
3896   PetscErrorCode ierr;
3897 
3898   PetscFunctionBegin;
3899   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3900   PetscFunctionReturn(0);
3901 }
3902 
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3905 /*@C
3906    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3907    (the default parallel PETSc format).  For good matrix assembly performance
3908    the user should preallocate the matrix storage by setting the parameters
3909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910    performance can be increased by more than a factor of 50.
3911 
3912    Collective on MPI_Comm
3913 
3914    Input Parameters:
3915 +  A - the matrix
3916 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917            (same value is used for all local rows)
3918 .  d_nnz - array containing the number of nonzeros in the various rows of the
3919            DIAGONAL portion of the local submatrix (possibly different for each row)
3920            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3921            The size of this array is equal to the number of local rows, i.e 'm'.
3922            For matrices that will be factored, you must leave room for (and set)
3923            the diagonal entry even if it is zero.
3924 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925            submatrix (same value is used for all local rows).
3926 -  o_nnz - array containing the number of nonzeros in the various rows of the
3927            OFF-DIAGONAL portion of the local submatrix (possibly different for
3928            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3929            structure. The size of this array is equal to the number
3930            of local rows, i.e 'm'.
3931 
3932    If the *_nnz parameter is given then the *_nz parameter is ignored
3933 
3934    The AIJ format (also called the Yale sparse matrix format or
3935    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3936    storage.  The stored row and column indices begin with zero.
3937    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3938 
3939    The parallel matrix is partitioned such that the first m0 rows belong to
3940    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3941    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3942 
3943    The DIAGONAL portion of the local submatrix of a processor can be defined
3944    as the submatrix which is obtained by extraction the part corresponding to
3945    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3946    first row that belongs to the processor, r2 is the last row belonging to
3947    the this processor, and c1-c2 is range of indices of the local part of a
3948    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3949    common case of a square matrix, the row and column ranges are the same and
3950    the DIAGONAL part is also square. The remaining portion of the local
3951    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3952 
3953    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3954 
3955    You can call MatGetInfo() to get information on how effective the preallocation was;
3956    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3957    You can also run with the option -info and look for messages with the string
3958    malloc in them to see if additional memory allocation was needed.
3959 
3960    Example usage:
3961 
3962    Consider the following 8x8 matrix with 34 non-zero values, that is
3963    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3964    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3965    as follows:
3966 
3967 .vb
3968             1  2  0  |  0  3  0  |  0  4
3969     Proc0   0  5  6  |  7  0  0  |  8  0
3970             9  0 10  | 11  0  0  | 12  0
3971     -------------------------------------
3972            13  0 14  | 15 16 17  |  0  0
3973     Proc1   0 18  0  | 19 20 21  |  0  0
3974             0  0  0  | 22 23  0  | 24  0
3975     -------------------------------------
3976     Proc2  25 26 27  |  0  0 28  | 29  0
3977            30  0  0  | 31 32 33  |  0 34
3978 .ve
3979 
3980    This can be represented as a collection of submatrices as:
3981 
3982 .vb
3983       A B C
3984       D E F
3985       G H I
3986 .ve
3987 
3988    Where the submatrices A,B,C are owned by proc0, D,E,F are
3989    owned by proc1, G,H,I are owned by proc2.
3990 
3991    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'M','N' parameters are 8,8, and have the same values on all procs.
3994 
3995    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3996    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3997    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3998    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3999    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4000    matrix, ans [DF] as another SeqAIJ matrix.
4001 
4002    When d_nz, o_nz parameters are specified, d_nz storage elements are
4003    allocated for every row of the local diagonal submatrix, and o_nz
4004    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4005    One way to choose d_nz and o_nz is to use the max nonzerors per local
4006    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4007    In this case, the values of d_nz,o_nz are:
4008 .vb
4009      proc0 : dnz = 2, o_nz = 2
4010      proc1 : dnz = 3, o_nz = 2
4011      proc2 : dnz = 1, o_nz = 4
4012 .ve
4013    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4014    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4015    for proc3. i.e we are using 12+15+10=37 storage locations to store
4016    34 values.
4017 
4018    When d_nnz, o_nnz parameters are specified, the storage is specified
4019    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4020    In the above case the values for d_nnz,o_nnz are:
4021 .vb
4022      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4023      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4024      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4025 .ve
4026    Here the space allocated is sum of all the above values i.e 34, and
4027    hence pre-allocation is perfect.
4028 
4029    Level: intermediate
4030 
4031 .keywords: matrix, aij, compressed row, sparse, parallel
4032 
4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4034           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4035 @*/
4036 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4037 {
4038   PetscErrorCode ierr;
4039 
4040   PetscFunctionBegin;
4041   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4042   PetscValidType(B,1);
4043   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 #undef __FUNCT__
4048 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4049 /*@
4050      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4051          CSR format the local rows.
4052 
4053    Collective on MPI_Comm
4054 
4055    Input Parameters:
4056 +  comm - MPI communicator
4057 .  m - number of local rows (Cannot be PETSC_DECIDE)
4058 .  n - This value should be the same as the local size used in creating the
4059        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4060        calculated if N is given) For square matrices n is almost always m.
4061 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4062 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4063 .   i - row indices
4064 .   j - column indices
4065 -   a - matrix values
4066 
4067    Output Parameter:
4068 .   mat - the matrix
4069 
4070    Level: intermediate
4071 
4072    Notes:
4073        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4074      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4075      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4076 
4077        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4078 
4079        The format which is used for the sparse matrix input, is equivalent to a
4080     row-major ordering.. i.e for the following matrix, the input data expected is
4081     as shown:
4082 
4083         1 0 0
4084         2 0 3     P0
4085        -------
4086         4 5 6     P1
4087 
4088      Process0 [P0]: rows_owned=[0,1]
4089         i =  {0,1,3}  [size = nrow+1  = 2+1]
4090         j =  {0,0,2}  [size = nz = 6]
4091         v =  {1,2,3}  [size = nz = 6]
4092 
4093      Process1 [P1]: rows_owned=[2]
4094         i =  {0,3}    [size = nrow+1  = 1+1]
4095         j =  {0,1,2}  [size = nz = 6]
4096         v =  {4,5,6}  [size = nz = 6]
4097 
4098 .keywords: matrix, aij, compressed row, sparse, parallel
4099 
4100 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4101           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4102 @*/
4103 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4104 {
4105   PetscErrorCode ierr;
4106 
4107   PetscFunctionBegin;
4108   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4109   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4110   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4111   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4112   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4113   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4114   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4115   PetscFunctionReturn(0);
4116 }
4117 
4118 #undef __FUNCT__
4119 #define __FUNCT__ "MatCreateAIJ"
4120 /*@C
4121    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4122    (the default parallel PETSc format).  For good matrix assembly performance
4123    the user should preallocate the matrix storage by setting the parameters
4124    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4125    performance can be increased by more than a factor of 50.
4126 
4127    Collective on MPI_Comm
4128 
4129    Input Parameters:
4130 +  comm - MPI communicator
4131 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4132            This value should be the same as the local size used in creating the
4133            y vector for the matrix-vector product y = Ax.
4134 .  n - This value should be the same as the local size used in creating the
4135        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4136        calculated if N is given) For square matrices n is almost always m.
4137 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4138 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4139 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4140            (same value is used for all local rows)
4141 .  d_nnz - array containing the number of nonzeros in the various rows of the
4142            DIAGONAL portion of the local submatrix (possibly different for each row)
4143            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4144            The size of this array is equal to the number of local rows, i.e 'm'.
4145 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4146            submatrix (same value is used for all local rows).
4147 -  o_nnz - array containing the number of nonzeros in the various rows of the
4148            OFF-DIAGONAL portion of the local submatrix (possibly different for
4149            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4150            structure. The size of this array is equal to the number
4151            of local rows, i.e 'm'.
4152 
4153    Output Parameter:
4154 .  A - the matrix
4155 
4156    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4157    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4158    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4159 
4160    Notes:
4161    If the *_nnz parameter is given then the *_nz parameter is ignored
4162 
4163    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4164    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4165    storage requirements for this matrix.
4166 
4167    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4168    processor than it must be used on all processors that share the object for
4169    that argument.
4170 
4171    The user MUST specify either the local or global matrix dimensions
4172    (possibly both).
4173 
4174    The parallel matrix is partitioned across processors such that the
4175    first m0 rows belong to process 0, the next m1 rows belong to
4176    process 1, the next m2 rows belong to process 2 etc.. where
4177    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4178    values corresponding to [m x N] submatrix.
4179 
4180    The columns are logically partitioned with the n0 columns belonging
4181    to 0th partition, the next n1 columns belonging to the next
4182    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4183 
4184    The DIAGONAL portion of the local submatrix on any given processor
4185    is the submatrix corresponding to the rows and columns m,n
4186    corresponding to the given processor. i.e diagonal matrix on
4187    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4188    etc. The remaining portion of the local submatrix [m x (N-n)]
4189    constitute the OFF-DIAGONAL portion. The example below better
4190    illustrates this concept.
4191 
4192    For a square global matrix we define each processor's diagonal portion
4193    to be its local rows and the corresponding columns (a square submatrix);
4194    each processor's off-diagonal portion encompasses the remainder of the
4195    local matrix (a rectangular submatrix).
4196 
4197    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4198 
4199    When calling this routine with a single process communicator, a matrix of
4200    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4201    type of communicator, use the construction mechanism:
4202      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4203 
4204    By default, this format uses inodes (identical nodes) when possible.
4205    We search for consecutive rows with the same nonzero structure, thereby
4206    reusing matrix information to achieve increased efficiency.
4207 
4208    Options Database Keys:
4209 +  -mat_no_inode  - Do not use inodes
4210 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4211 -  -mat_aij_oneindex - Internally use indexing starting at 1
4212         rather than 0.  Note that when calling MatSetValues(),
4213         the user still MUST index entries starting at 0!
4214 
4215 
4216    Example usage:
4217 
4218    Consider the following 8x8 matrix with 34 non-zero values, that is
4219    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4220    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4221    as follows:
4222 
4223 .vb
4224             1  2  0  |  0  3  0  |  0  4
4225     Proc0   0  5  6  |  7  0  0  |  8  0
4226             9  0 10  | 11  0  0  | 12  0
4227     -------------------------------------
4228            13  0 14  | 15 16 17  |  0  0
4229     Proc1   0 18  0  | 19 20 21  |  0  0
4230             0  0  0  | 22 23  0  | 24  0
4231     -------------------------------------
4232     Proc2  25 26 27  |  0  0 28  | 29  0
4233            30  0  0  | 31 32 33  |  0 34
4234 .ve
4235 
4236    This can be represented as a collection of submatrices as:
4237 
4238 .vb
4239       A B C
4240       D E F
4241       G H I
4242 .ve
4243 
4244    Where the submatrices A,B,C are owned by proc0, D,E,F are
4245    owned by proc1, G,H,I are owned by proc2.
4246 
4247    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4248    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4249    The 'M','N' parameters are 8,8, and have the same values on all procs.
4250 
4251    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4252    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4253    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4254    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4255    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4256    matrix, ans [DF] as another SeqAIJ matrix.
4257 
4258    When d_nz, o_nz parameters are specified, d_nz storage elements are
4259    allocated for every row of the local diagonal submatrix, and o_nz
4260    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4261    One way to choose d_nz and o_nz is to use the max nonzerors per local
4262    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4263    In this case, the values of d_nz,o_nz are:
4264 .vb
4265      proc0 : dnz = 2, o_nz = 2
4266      proc1 : dnz = 3, o_nz = 2
4267      proc2 : dnz = 1, o_nz = 4
4268 .ve
4269    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4270    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4271    for proc3. i.e we are using 12+15+10=37 storage locations to store
4272    34 values.
4273 
4274    When d_nnz, o_nnz parameters are specified, the storage is specified
4275    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4276    In the above case the values for d_nnz,o_nnz are:
4277 .vb
4278      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4279      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4280      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4281 .ve
4282    Here the space allocated is sum of all the above values i.e 34, and
4283    hence pre-allocation is perfect.
4284 
4285    Level: intermediate
4286 
4287 .keywords: matrix, aij, compressed row, sparse, parallel
4288 
4289 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4290           MPIAIJ, MatCreateMPIAIJWithArrays()
4291 @*/
4292 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4293 {
4294   PetscErrorCode ierr;
4295   PetscMPIInt    size;
4296 
4297   PetscFunctionBegin;
4298   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4299   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4300   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4301   if (size > 1) {
4302     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4303     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4304   } else {
4305     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4306     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4307   }
4308   PetscFunctionReturn(0);
4309 }
4310 
4311 #undef __FUNCT__
4312 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4313 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4314 {
4315   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4316 
4317   PetscFunctionBegin;
4318   *Ad     = a->A;
4319   *Ao     = a->B;
4320   *colmap = a->garray;
4321   PetscFunctionReturn(0);
4322 }
4323 
4324 #undef __FUNCT__
4325 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4326 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4327 {
4328   PetscErrorCode ierr;
4329   PetscInt       i;
4330   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4331 
4332   PetscFunctionBegin;
4333   if (coloring->ctype == IS_COLORING_GLOBAL) {
4334     ISColoringValue *allcolors,*colors;
4335     ISColoring      ocoloring;
4336 
4337     /* set coloring for diagonal portion */
4338     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4339 
4340     /* set coloring for off-diagonal portion */
4341     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4342     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4343     for (i=0; i<a->B->cmap->n; i++) {
4344       colors[i] = allcolors[a->garray[i]];
4345     }
4346     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4347     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4348     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4349     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4350   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4351     ISColoringValue *colors;
4352     PetscInt        *larray;
4353     ISColoring      ocoloring;
4354 
4355     /* set coloring for diagonal portion */
4356     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4357     for (i=0; i<a->A->cmap->n; i++) {
4358       larray[i] = i + A->cmap->rstart;
4359     }
4360     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4361     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4362     for (i=0; i<a->A->cmap->n; i++) {
4363       colors[i] = coloring->colors[larray[i]];
4364     }
4365     ierr = PetscFree(larray);CHKERRQ(ierr);
4366     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4367     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4368     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4369 
4370     /* set coloring for off-diagonal portion */
4371     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4372     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4373     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4374     for (i=0; i<a->B->cmap->n; i++) {
4375       colors[i] = coloring->colors[larray[i]];
4376     }
4377     ierr = PetscFree(larray);CHKERRQ(ierr);
4378     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4379     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4380     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4381   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4382   PetscFunctionReturn(0);
4383 }
4384 
4385 #undef __FUNCT__
4386 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4387 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4388 {
4389   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4390   PetscErrorCode ierr;
4391 
4392   PetscFunctionBegin;
4393   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4394   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4400 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4401 {
4402   PetscErrorCode ierr;
4403   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4404   PetscInt       *indx;
4405 
4406   PetscFunctionBegin;
4407   /* This routine will ONLY return MPIAIJ type matrix */
4408   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4409   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4410   if (n == PETSC_DECIDE) {
4411     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4412   }
4413   /* Check sum(n) = N */
4414   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4415   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4416 
4417   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4418   rstart -= m;
4419 
4420   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4421   for (i=0;i<m;i++) {
4422     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4423     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4424     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4425   }
4426 
4427   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4428   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4429   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4430   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4431   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4432   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #undef __FUNCT__
4437 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4438 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4439 {
4440   PetscErrorCode ierr;
4441   PetscInt       m,N,i,rstart,nnz,Ii;
4442   PetscInt       *indx;
4443   PetscScalar    *values;
4444 
4445   PetscFunctionBegin;
4446   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4447   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4448   for (i=0;i<m;i++) {
4449     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4450     Ii    = i + rstart;
4451     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4452     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4453   }
4454   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4455   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4456   PetscFunctionReturn(0);
4457 }
4458 
4459 #undef __FUNCT__
4460 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4461 /*@
4462       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4463                  matrices from each processor
4464 
4465     Collective on MPI_Comm
4466 
4467    Input Parameters:
4468 +    comm - the communicators the parallel matrix will live on
4469 .    inmat - the input sequential matrices
4470 .    n - number of local columns (or PETSC_DECIDE)
4471 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4472 
4473    Output Parameter:
4474 .    outmat - the parallel matrix generated
4475 
4476     Level: advanced
4477 
4478    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4479 
4480 @*/
4481 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4482 {
4483   PetscErrorCode ierr;
4484 
4485   PetscFunctionBegin;
4486   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4487   if (scall == MAT_INITIAL_MATRIX) {
4488     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4489   }
4490   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4491   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4492   PetscFunctionReturn(0);
4493 }
4494 
4495 #undef __FUNCT__
4496 #define __FUNCT__ "MatFileSplit"
4497 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4498 {
4499   PetscErrorCode    ierr;
4500   PetscMPIInt       rank;
4501   PetscInt          m,N,i,rstart,nnz;
4502   size_t            len;
4503   const PetscInt    *indx;
4504   PetscViewer       out;
4505   char              *name;
4506   Mat               B;
4507   const PetscScalar *values;
4508 
4509   PetscFunctionBegin;
4510   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4511   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4512   /* Should this be the type of the diagonal block of A? */
4513   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4514   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4515   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4516   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4517   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4518   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4519   for (i=0;i<m;i++) {
4520     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4521     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4522     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4523   }
4524   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4525   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4526 
4527   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4528   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4529   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4530   sprintf(name,"%s.%d",outfile,rank);
4531   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4532   ierr = PetscFree(name);CHKERRQ(ierr);
4533   ierr = MatView(B,out);CHKERRQ(ierr);
4534   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4535   ierr = MatDestroy(&B);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4540 #undef __FUNCT__
4541 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4542 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4543 {
4544   PetscErrorCode       ierr;
4545   Mat_Merge_SeqsToMPI  *merge;
4546   PetscContainer       container;
4547 
4548   PetscFunctionBegin;
4549   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4550   if (container) {
4551     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4564     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4565     ierr = PetscFree(merge);CHKERRQ(ierr);
4566     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4567   }
4568   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 #include <../src/mat/utils/freespace.h>
4573 #include <petscbt.h>
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4577 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4578 {
4579   PetscErrorCode       ierr;
4580   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4581   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4582   PetscMPIInt          size,rank,taga,*len_s;
4583   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4584   PetscInt             proc,m;
4585   PetscInt             **buf_ri,**buf_rj;
4586   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4587   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4588   MPI_Request          *s_waits,*r_waits;
4589   MPI_Status           *status;
4590   MatScalar            *aa=a->a;
4591   MatScalar            **abuf_r,*ba_i;
4592   Mat_Merge_SeqsToMPI  *merge;
4593   PetscContainer       container;
4594 
4595   PetscFunctionBegin;
4596   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4597 
4598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4600 
4601   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4602   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4603 
4604   bi     = merge->bi;
4605   bj     = merge->bj;
4606   buf_ri = merge->buf_ri;
4607   buf_rj = merge->buf_rj;
4608 
4609   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4610   owners = merge->rowmap->range;
4611   len_s  = merge->len_s;
4612 
4613   /* send and recv matrix values */
4614   /*-----------------------------*/
4615   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4616   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4617 
4618   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4619   for (proc=0,k=0; proc<size; proc++) {
4620     if (!len_s[proc]) continue;
4621     i = owners[proc];
4622     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4623     k++;
4624   }
4625 
4626   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4627   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4628   ierr = PetscFree(status);CHKERRQ(ierr);
4629 
4630   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4631   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4632 
4633   /* insert mat values of mpimat */
4634   /*----------------------------*/
4635   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4636   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4637 
4638   for (k=0; k<merge->nrecv; k++) {
4639     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4640     nrows = *(buf_ri_k[k]);
4641     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4642     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4643   }
4644 
4645   /* set values of ba */
4646   m = merge->rowmap->n;
4647   for (i=0; i<m; i++) {
4648     arow = owners[rank] + i;
4649     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4650     bnzi = bi[i+1] - bi[i];
4651     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4652 
4653     /* add local non-zero vals of this proc's seqmat into ba */
4654     anzi = ai[arow+1] - ai[arow];
4655     aj   = a->j + ai[arow];
4656     aa   = a->a + ai[arow];
4657     nextaj = 0;
4658     for (j=0; nextaj<anzi; j++) {
4659       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4660         ba_i[j] += aa[nextaj++];
4661       }
4662     }
4663 
4664     /* add received vals into ba */
4665     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4666       /* i-th row */
4667       if (i == *nextrow[k]) {
4668         anzi = *(nextai[k]+1) - *nextai[k];
4669         aj   = buf_rj[k] + *(nextai[k]);
4670         aa   = abuf_r[k] + *(nextai[k]);
4671         nextaj = 0;
4672         for (j=0; nextaj<anzi; j++) {
4673           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4674             ba_i[j] += aa[nextaj++];
4675           }
4676         }
4677         nextrow[k]++; nextai[k]++;
4678       }
4679     }
4680     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4681   }
4682   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4683   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4684 
4685   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4686   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4687   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4688   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4689   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4690   PetscFunctionReturn(0);
4691 }
4692 
4693 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4694 
4695 #undef __FUNCT__
4696 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4697 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4698 {
4699   PetscErrorCode       ierr;
4700   Mat                  B_mpi;
4701   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4702   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4703   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4704   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4705   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4706   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4707   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4708   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4709   MPI_Status           *status;
4710   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4711   PetscBT              lnkbt;
4712   Mat_Merge_SeqsToMPI  *merge;
4713   PetscContainer       container;
4714 
4715   PetscFunctionBegin;
4716   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4717 
4718   /* make sure it is a PETSc comm */
4719   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4720   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4721   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4722 
4723   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4724   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4725 
4726   /* determine row ownership */
4727   /*---------------------------------------------------------*/
4728   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4729   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4730   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4731   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4732   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4733   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4734   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4735 
4736   m      = merge->rowmap->n;
4737   owners = merge->rowmap->range;
4738 
4739   /* determine the number of messages to send, their lengths */
4740   /*---------------------------------------------------------*/
4741   len_s  = merge->len_s;
4742 
4743   len = 0;  /* length of buf_si[] */
4744   merge->nsend = 0;
4745   for (proc=0; proc<size; proc++) {
4746     len_si[proc] = 0;
4747     if (proc == rank) {
4748       len_s[proc] = 0;
4749     } else {
4750       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4751       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4752     }
4753     if (len_s[proc]) {
4754       merge->nsend++;
4755       nrows = 0;
4756       for (i=owners[proc]; i<owners[proc+1]; i++) {
4757         if (ai[i+1] > ai[i]) nrows++;
4758       }
4759       len_si[proc] = 2*(nrows+1);
4760       len += len_si[proc];
4761     }
4762   }
4763 
4764   /* determine the number and length of messages to receive for ij-structure */
4765   /*-------------------------------------------------------------------------*/
4766   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4767   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4768 
4769   /* post the Irecv of j-structure */
4770   /*-------------------------------*/
4771   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4772   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4773 
4774   /* post the Isend of j-structure */
4775   /*--------------------------------*/
4776   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4777 
4778   for (proc=0, k=0; proc<size; proc++) {
4779     if (!len_s[proc]) continue;
4780     i = owners[proc];
4781     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4782     k++;
4783   }
4784 
4785   /* receives and sends of j-structure are complete */
4786   /*------------------------------------------------*/
4787   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4788   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4789 
4790   /* send and recv i-structure */
4791   /*---------------------------*/
4792   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4793   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4794 
4795   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4796   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4797   for (proc=0,k=0; proc<size; proc++) {
4798     if (!len_s[proc]) continue;
4799     /* form outgoing message for i-structure:
4800          buf_si[0]:                 nrows to be sent
4801                [1:nrows]:           row index (global)
4802                [nrows+1:2*nrows+1]: i-structure index
4803     */
4804     /*-------------------------------------------*/
4805     nrows = len_si[proc]/2 - 1;
4806     buf_si_i    = buf_si + nrows+1;
4807     buf_si[0]   = nrows;
4808     buf_si_i[0] = 0;
4809     nrows = 0;
4810     for (i=owners[proc]; i<owners[proc+1]; i++) {
4811       anzi = ai[i+1] - ai[i];
4812       if (anzi) {
4813         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4814         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4815         nrows++;
4816       }
4817     }
4818     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4819     k++;
4820     buf_si += len_si[proc];
4821   }
4822 
4823   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4824   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4825 
4826   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4827   for (i=0; i<merge->nrecv; i++) {
4828     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4829   }
4830 
4831   ierr = PetscFree(len_si);CHKERRQ(ierr);
4832   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4833   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4834   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4835   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4836   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4837   ierr = PetscFree(status);CHKERRQ(ierr);
4838 
4839   /* compute a local seq matrix in each processor */
4840   /*----------------------------------------------*/
4841   /* allocate bi array and free space for accumulating nonzero column info */
4842   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4843   bi[0] = 0;
4844 
4845   /* create and initialize a linked list */
4846   nlnk = N+1;
4847   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4848 
4849   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4850   len  = ai[owners[rank+1]] - ai[owners[rank]];
4851   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4852   current_space = free_space;
4853 
4854   /* determine symbolic info for each local row */
4855   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4856 
4857   for (k=0; k<merge->nrecv; k++) {
4858     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4859     nrows = *buf_ri_k[k];
4860     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4861     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4862   }
4863 
4864   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4865   len = 0;
4866   for (i=0;i<m;i++) {
4867     bnzi   = 0;
4868     /* add local non-zero cols of this proc's seqmat into lnk */
4869     arow   = owners[rank] + i;
4870     anzi   = ai[arow+1] - ai[arow];
4871     aj     = a->j + ai[arow];
4872     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4873     bnzi += nlnk;
4874     /* add received col data into lnk */
4875     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4876       if (i == *nextrow[k]) { /* i-th row */
4877         anzi = *(nextai[k]+1) - *nextai[k];
4878         aj   = buf_rj[k] + *nextai[k];
4879         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4880         bnzi += nlnk;
4881         nextrow[k]++; nextai[k]++;
4882       }
4883     }
4884     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4885 
4886     /* if free space is not available, make more free space */
4887     if (current_space->local_remaining<bnzi) {
4888       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4889       nspacedouble++;
4890     }
4891     /* copy data into free space, then initialize lnk */
4892     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4893     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4894 
4895     current_space->array           += bnzi;
4896     current_space->local_used      += bnzi;
4897     current_space->local_remaining -= bnzi;
4898 
4899     bi[i+1] = bi[i] + bnzi;
4900   }
4901 
4902   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4903 
4904   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4905   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4906   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4907 
4908   /* create symbolic parallel matrix B_mpi */
4909   /*---------------------------------------*/
4910     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4911   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4912   if (n==PETSC_DECIDE) {
4913     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4914   } else {
4915     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4916   }
4917   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4918   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4919   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4920   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4921   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4922 
4923   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4924   B_mpi->assembled     = PETSC_FALSE;
4925   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4926   merge->bi            = bi;
4927   merge->bj            = bj;
4928   merge->buf_ri        = buf_ri;
4929   merge->buf_rj        = buf_rj;
4930   merge->coi           = PETSC_NULL;
4931   merge->coj           = PETSC_NULL;
4932   merge->owners_co     = PETSC_NULL;
4933 
4934   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4935 
4936   /* attach the supporting struct to B_mpi for reuse */
4937   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4938   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4939   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4940   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4941   *mpimat = B_mpi;
4942 
4943   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4944   PetscFunctionReturn(0);
4945 }
4946 
4947 #undef __FUNCT__
4948 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4949 /*@C
4950       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4951                  matrices from each processor
4952 
4953     Collective on MPI_Comm
4954 
4955    Input Parameters:
4956 +    comm - the communicators the parallel matrix will live on
4957 .    seqmat - the input sequential matrices
4958 .    m - number of local rows (or PETSC_DECIDE)
4959 .    n - number of local columns (or PETSC_DECIDE)
4960 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4961 
4962    Output Parameter:
4963 .    mpimat - the parallel matrix generated
4964 
4965     Level: advanced
4966 
4967    Notes:
4968      The dimensions of the sequential matrix in each processor MUST be the same.
4969      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4970      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4971 @*/
4972 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4973 {
4974   PetscErrorCode   ierr;
4975   PetscMPIInt     size;
4976 
4977   PetscFunctionBegin;
4978   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4979   if (size == 1) {
4980      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4981      if (scall == MAT_INITIAL_MATRIX) {
4982        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4983      } else {
4984        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4985      }
4986      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4987      PetscFunctionReturn(0);
4988   }
4989   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4990   if (scall == MAT_INITIAL_MATRIX) {
4991     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4992   }
4993   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4994   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4995   PetscFunctionReturn(0);
4996 }
4997 
4998 #undef __FUNCT__
4999 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5000 /*@
5001      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5002           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5003           with MatGetSize()
5004 
5005     Not Collective
5006 
5007    Input Parameters:
5008 +    A - the matrix
5009 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5010 
5011    Output Parameter:
5012 .    A_loc - the local sequential matrix generated
5013 
5014     Level: developer
5015 
5016 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5017 
5018 @*/
5019 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5020 {
5021   PetscErrorCode  ierr;
5022   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5023   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5024   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5025   MatScalar       *aa=a->a,*ba=b->a,*cam;
5026   PetscScalar     *ca;
5027   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5028   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5029   PetscBool       match;
5030 
5031   PetscFunctionBegin;
5032   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5033   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5034   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5035   if (scall == MAT_INITIAL_MATRIX) {
5036     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5037     ci[0] = 0;
5038     for (i=0; i<am; i++) {
5039       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5040     }
5041     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5042     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5043     k = 0;
5044     for (i=0; i<am; i++) {
5045       ncols_o = bi[i+1] - bi[i];
5046       ncols_d = ai[i+1] - ai[i];
5047       /* off-diagonal portion of A */
5048       for (jo=0; jo<ncols_o; jo++) {
5049         col = cmap[*bj];
5050         if (col >= cstart) break;
5051         cj[k]   = col; bj++;
5052         ca[k++] = *ba++;
5053       }
5054       /* diagonal portion of A */
5055       for (j=0; j<ncols_d; j++) {
5056         cj[k]   = cstart + *aj++;
5057         ca[k++] = *aa++;
5058       }
5059       /* off-diagonal portion of A */
5060       for (j=jo; j<ncols_o; j++) {
5061         cj[k]   = cmap[*bj++];
5062         ca[k++] = *ba++;
5063       }
5064     }
5065     /* put together the new matrix */
5066     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5067     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5068     /* Since these are PETSc arrays, change flags to free them as necessary. */
5069     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5070     mat->free_a  = PETSC_TRUE;
5071     mat->free_ij = PETSC_TRUE;
5072     mat->nonew   = 0;
5073   } else if (scall == MAT_REUSE_MATRIX) {
5074     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5075     ci = mat->i; cj = mat->j; cam = mat->a;
5076     for (i=0; i<am; i++) {
5077       /* off-diagonal portion of A */
5078       ncols_o = bi[i+1] - bi[i];
5079       for (jo=0; jo<ncols_o; jo++) {
5080         col = cmap[*bj];
5081         if (col >= cstart) break;
5082         *cam++ = *ba++; bj++;
5083       }
5084       /* diagonal portion of A */
5085       ncols_d = ai[i+1] - ai[i];
5086       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5087       /* off-diagonal portion of A */
5088       for (j=jo; j<ncols_o; j++) {
5089         *cam++ = *ba++; bj++;
5090       }
5091     }
5092   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5093   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5094   PetscFunctionReturn(0);
5095 }
5096 
5097 #undef __FUNCT__
5098 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5099 /*@C
5100      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5101 
5102     Not Collective
5103 
5104    Input Parameters:
5105 +    A - the matrix
5106 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5107 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5108 
5109    Output Parameter:
5110 .    A_loc - the local sequential matrix generated
5111 
5112     Level: developer
5113 
5114 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5115 
5116 @*/
5117 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5118 {
5119   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5120   PetscErrorCode    ierr;
5121   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5122   IS                isrowa,iscola;
5123   Mat               *aloc;
5124   PetscBool       match;
5125 
5126   PetscFunctionBegin;
5127   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5128   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5129   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5130   if (!row) {
5131     start = A->rmap->rstart; end = A->rmap->rend;
5132     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5133   } else {
5134     isrowa = *row;
5135   }
5136   if (!col) {
5137     start = A->cmap->rstart;
5138     cmap  = a->garray;
5139     nzA   = a->A->cmap->n;
5140     nzB   = a->B->cmap->n;
5141     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5142     ncols = 0;
5143     for (i=0; i<nzB; i++) {
5144       if (cmap[i] < start) idx[ncols++] = cmap[i];
5145       else break;
5146     }
5147     imark = i;
5148     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5149     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5150     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5151   } else {
5152     iscola = *col;
5153   }
5154   if (scall != MAT_INITIAL_MATRIX) {
5155     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5156     aloc[0] = *A_loc;
5157   }
5158   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5159   *A_loc = aloc[0];
5160   ierr = PetscFree(aloc);CHKERRQ(ierr);
5161   if (!row) {
5162     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5163   }
5164   if (!col) {
5165     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5166   }
5167   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5168   PetscFunctionReturn(0);
5169 }
5170 
5171 #undef __FUNCT__
5172 #define __FUNCT__ "MatGetBrowsOfAcols"
5173 /*@C
5174     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5175 
5176     Collective on Mat
5177 
5178    Input Parameters:
5179 +    A,B - the matrices in mpiaij format
5180 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5181 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5182 
5183    Output Parameter:
5184 +    rowb, colb - index sets of rows and columns of B to extract
5185 -    B_seq - the sequential matrix generated
5186 
5187     Level: developer
5188 
5189 @*/
5190 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5191 {
5192   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5193   PetscErrorCode    ierr;
5194   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5195   IS                isrowb,iscolb;
5196   Mat               *bseq=PETSC_NULL;
5197 
5198   PetscFunctionBegin;
5199   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5200     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5201   }
5202   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5203 
5204   if (scall == MAT_INITIAL_MATRIX) {
5205     start = A->cmap->rstart;
5206     cmap  = a->garray;
5207     nzA   = a->A->cmap->n;
5208     nzB   = a->B->cmap->n;
5209     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5210     ncols = 0;
5211     for (i=0; i<nzB; i++) {  /* row < local row index */
5212       if (cmap[i] < start) idx[ncols++] = cmap[i];
5213       else break;
5214     }
5215     imark = i;
5216     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5217     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5218     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5219     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5220   } else {
5221     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5222     isrowb = *rowb; iscolb = *colb;
5223     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5224     bseq[0] = *B_seq;
5225   }
5226   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5227   *B_seq = bseq[0];
5228   ierr = PetscFree(bseq);CHKERRQ(ierr);
5229   if (!rowb) {
5230     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5231   } else {
5232     *rowb = isrowb;
5233   }
5234   if (!colb) {
5235     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5236   } else {
5237     *colb = iscolb;
5238   }
5239   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5240   PetscFunctionReturn(0);
5241 }
5242 
5243 #undef __FUNCT__
5244 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5245 /*
5246     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5247     of the OFF-DIAGONAL portion of local A
5248 
5249     Collective on Mat
5250 
5251    Input Parameters:
5252 +    A,B - the matrices in mpiaij format
5253 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5254 
5255    Output Parameter:
5256 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5257 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5258 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5259 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5260 
5261     Level: developer
5262 
5263 */
5264 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5265 {
5266   VecScatter_MPI_General *gen_to,*gen_from;
5267   PetscErrorCode         ierr;
5268   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5269   Mat_SeqAIJ             *b_oth;
5270   VecScatter             ctx=a->Mvctx;
5271   MPI_Comm               comm=((PetscObject)ctx)->comm;
5272   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5273   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5274   PetscScalar            *rvalues,*svalues;
5275   MatScalar              *b_otha,*bufa,*bufA;
5276   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5277   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5278   MPI_Status             *sstatus,rstatus;
5279   PetscMPIInt            jj;
5280   PetscInt               *cols,sbs,rbs;
5281   PetscScalar            *vals;
5282 
5283   PetscFunctionBegin;
5284   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5285     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5286   }
5287   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5288   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5289 
5290   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5291   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5292   rvalues  = gen_from->values; /* holds the length of receiving row */
5293   svalues  = gen_to->values;   /* holds the length of sending row */
5294   nrecvs   = gen_from->n;
5295   nsends   = gen_to->n;
5296 
5297   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5298   srow     = gen_to->indices;   /* local row index to be sent */
5299   sstarts  = gen_to->starts;
5300   sprocs   = gen_to->procs;
5301   sstatus  = gen_to->sstatus;
5302   sbs      = gen_to->bs;
5303   rstarts  = gen_from->starts;
5304   rprocs   = gen_from->procs;
5305   rbs      = gen_from->bs;
5306 
5307   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5308   if (scall == MAT_INITIAL_MATRIX) {
5309     /* i-array */
5310     /*---------*/
5311     /*  post receives */
5312     for (i=0; i<nrecvs; i++) {
5313       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5314       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5315       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5316     }
5317 
5318     /* pack the outgoing message */
5319     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5320     sstartsj[0] = 0;  rstartsj[0] = 0;
5321     len = 0; /* total length of j or a array to be sent */
5322     k = 0;
5323     for (i=0; i<nsends; i++) {
5324       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5325       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5326       for (j=0; j<nrows; j++) {
5327         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5328         for (l=0; l<sbs; l++) {
5329           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5330           rowlen[j*sbs+l] = ncols;
5331           len += ncols;
5332           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5333         }
5334         k++;
5335       }
5336       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5337       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5338     }
5339     /* recvs and sends of i-array are completed */
5340     i = nrecvs;
5341     while (i--) {
5342       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5343     }
5344     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5345 
5346     /* allocate buffers for sending j and a arrays */
5347     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5348     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5349 
5350     /* create i-array of B_oth */
5351     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5352     b_othi[0] = 0;
5353     len = 0; /* total length of j or a array to be received */
5354     k = 0;
5355     for (i=0; i<nrecvs; i++) {
5356       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5357       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5358       for (j=0; j<nrows; j++) {
5359         b_othi[k+1] = b_othi[k] + rowlen[j];
5360         len += rowlen[j]; k++;
5361       }
5362       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5363     }
5364 
5365     /* allocate space for j and a arrrays of B_oth */
5366     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5367     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5368 
5369     /* j-array */
5370     /*---------*/
5371     /*  post receives of j-array */
5372     for (i=0; i<nrecvs; i++) {
5373       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5374       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5375     }
5376 
5377     /* pack the outgoing message j-array */
5378     k = 0;
5379     for (i=0; i<nsends; i++) {
5380       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5381       bufJ = bufj+sstartsj[i];
5382       for (j=0; j<nrows; j++) {
5383         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5384         for (ll=0; ll<sbs; ll++) {
5385           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5386           for (l=0; l<ncols; l++) {
5387             *bufJ++ = cols[l];
5388           }
5389           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5390         }
5391       }
5392       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5393     }
5394 
5395     /* recvs and sends of j-array are completed */
5396     i = nrecvs;
5397     while (i--) {
5398       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5399     }
5400     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5401   } else if (scall == MAT_REUSE_MATRIX) {
5402     sstartsj = *startsj_s;
5403     rstartsj = *startsj_r;
5404     bufa     = *bufa_ptr;
5405     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5406     b_otha   = b_oth->a;
5407   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5408 
5409   /* a-array */
5410   /*---------*/
5411   /*  post receives of a-array */
5412   for (i=0; i<nrecvs; i++) {
5413     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5414     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5415   }
5416 
5417   /* pack the outgoing message a-array */
5418   k = 0;
5419   for (i=0; i<nsends; i++) {
5420     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5421     bufA = bufa+sstartsj[i];
5422     for (j=0; j<nrows; j++) {
5423       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5424       for (ll=0; ll<sbs; ll++) {
5425         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5426         for (l=0; l<ncols; l++) {
5427           *bufA++ = vals[l];
5428         }
5429         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5430       }
5431     }
5432     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5433   }
5434   /* recvs and sends of a-array are completed */
5435   i = nrecvs;
5436   while (i--) {
5437     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5438   }
5439   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5440   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5441 
5442   if (scall == MAT_INITIAL_MATRIX) {
5443     /* put together the new matrix */
5444     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5445 
5446     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5447     /* Since these are PETSc arrays, change flags to free them as necessary. */
5448     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5449     b_oth->free_a  = PETSC_TRUE;
5450     b_oth->free_ij = PETSC_TRUE;
5451     b_oth->nonew   = 0;
5452 
5453     ierr = PetscFree(bufj);CHKERRQ(ierr);
5454     if (!startsj_s || !bufa_ptr) {
5455       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5456       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5457     } else {
5458       *startsj_s = sstartsj;
5459       *startsj_r = rstartsj;
5460       *bufa_ptr  = bufa;
5461     }
5462   }
5463   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5464   PetscFunctionReturn(0);
5465 }
5466 
5467 #undef __FUNCT__
5468 #define __FUNCT__ "MatGetCommunicationStructs"
5469 /*@C
5470   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5471 
5472   Not Collective
5473 
5474   Input Parameters:
5475 . A - The matrix in mpiaij format
5476 
5477   Output Parameter:
5478 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5479 . colmap - A map from global column index to local index into lvec
5480 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5481 
5482   Level: developer
5483 
5484 @*/
5485 #if defined (PETSC_USE_CTABLE)
5486 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5487 #else
5488 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5489 #endif
5490 {
5491   Mat_MPIAIJ *a;
5492 
5493   PetscFunctionBegin;
5494   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5495   PetscValidPointer(lvec, 2);
5496   PetscValidPointer(colmap, 3);
5497   PetscValidPointer(multScatter, 4);
5498   a = (Mat_MPIAIJ *) A->data;
5499   if (lvec) *lvec = a->lvec;
5500   if (colmap) *colmap = a->colmap;
5501   if (multScatter) *multScatter = a->Mvctx;
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 EXTERN_C_BEGIN
5506 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5507 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5508 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5509 EXTERN_C_END
5510 
5511 #undef __FUNCT__
5512 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5513 /*
5514     Computes (B'*A')' since computing B*A directly is untenable
5515 
5516                n                       p                          p
5517         (              )       (              )         (                  )
5518       m (      A       )  *  n (       B      )   =   m (         C        )
5519         (              )       (              )         (                  )
5520 
5521 */
5522 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5523 {
5524   PetscErrorCode     ierr;
5525   Mat                At,Bt,Ct;
5526 
5527   PetscFunctionBegin;
5528   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5529   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5530   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5531   ierr = MatDestroy(&At);CHKERRQ(ierr);
5532   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5533   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5534   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5535   PetscFunctionReturn(0);
5536 }
5537 
5538 #undef __FUNCT__
5539 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5540 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5541 {
5542   PetscErrorCode ierr;
5543   PetscInt       m=A->rmap->n,n=B->cmap->n;
5544   Mat            Cmat;
5545 
5546   PetscFunctionBegin;
5547   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5548   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5549   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5550   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5551   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5552   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5553   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5554   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5555 
5556   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5557   *C = Cmat;
5558   PetscFunctionReturn(0);
5559 }
5560 
5561 /* ----------------------------------------------------------------*/
5562 #undef __FUNCT__
5563 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5564 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5565 {
5566   PetscErrorCode ierr;
5567 
5568   PetscFunctionBegin;
5569   if (scall == MAT_INITIAL_MATRIX) {
5570     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5571   }
5572   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5573   PetscFunctionReturn(0);
5574 }
5575 
5576 EXTERN_C_BEGIN
5577 #if defined(PETSC_HAVE_MUMPS)
5578 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5579 #endif
5580 #if defined(PETSC_HAVE_PASTIX)
5581 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5582 #endif
5583 #if defined(PETSC_HAVE_SUPERLU_DIST)
5584 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5585 #endif
5586 #if defined(PETSC_HAVE_CLIQUE)
5587 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5588 #endif
5589 EXTERN_C_END
5590 
5591 /*MC
5592    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5593 
5594    Options Database Keys:
5595 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5596 
5597   Level: beginner
5598 
5599 .seealso: MatCreateAIJ()
5600 M*/
5601 
5602 EXTERN_C_BEGIN
5603 #undef __FUNCT__
5604 #define __FUNCT__ "MatCreate_MPIAIJ"
5605 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5606 {
5607   Mat_MPIAIJ     *b;
5608   PetscErrorCode ierr;
5609   PetscMPIInt    size;
5610 
5611   PetscFunctionBegin;
5612   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5613   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5614   B->data         = (void*)b;
5615   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5616   B->assembled    = PETSC_FALSE;
5617   B->insertmode   = NOT_SET_VALUES;
5618   b->size         = size;
5619   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5620 
5621   /* build cache for off array entries formed */
5622   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5623   b->donotstash  = PETSC_FALSE;
5624   b->colmap      = 0;
5625   b->garray      = 0;
5626   b->roworiented = PETSC_TRUE;
5627 
5628   /* stuff used for matrix vector multiply */
5629   b->lvec      = PETSC_NULL;
5630   b->Mvctx     = PETSC_NULL;
5631 
5632   /* stuff for MatGetRow() */
5633   b->rowindices   = 0;
5634   b->rowvalues    = 0;
5635   b->getrowactive = PETSC_FALSE;
5636 
5637   /* flexible pointer used in CUSP/CUSPARSE classes */
5638   b->spptr        = PETSC_NULL;
5639 
5640 #if defined(PETSC_HAVE_MUMPS)
5641   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5642                                      "MatGetFactor_aij_mumps",
5643                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5644 #endif
5645 #if defined(PETSC_HAVE_PASTIX)
5646   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5647                                            "MatGetFactor_mpiaij_pastix",
5648                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5649 #endif
5650 #if defined(PETSC_HAVE_SUPERLU_DIST)
5651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5652                                      "MatGetFactor_mpiaij_superlu_dist",
5653                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5654 #endif
5655 #if defined(PETSC_HAVE_CLIQUE)
5656   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5657                                      "MatGetFactor_aij_clique",
5658                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5659 #endif
5660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5661                                      "MatStoreValues_MPIAIJ",
5662                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5664                                      "MatRetrieveValues_MPIAIJ",
5665                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5667                                      "MatGetDiagonalBlock_MPIAIJ",
5668                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5670                                      "MatIsTranspose_MPIAIJ",
5671                                      MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5673                                      "MatMPIAIJSetPreallocation_MPIAIJ",
5674                                      MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5676                                      "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5677                                      MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5679                                      "MatDiagonalScaleLocal_MPIAIJ",
5680                                      MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5682                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5683                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5685                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5686                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5688                                      "MatConvert_MPIAIJ_MPISBAIJ",
5689                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5691                                      "MatMatMult_MPIDense_MPIAIJ",
5692                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5693   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5694                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5695                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5697                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5698                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5699   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5700   PetscFunctionReturn(0);
5701 }
5702 EXTERN_C_END
5703 
5704 #undef __FUNCT__
5705 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5706 /*@
5707      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5708          and "off-diagonal" part of the matrix in CSR format.
5709 
5710    Collective on MPI_Comm
5711 
5712    Input Parameters:
5713 +  comm - MPI communicator
5714 .  m - number of local rows (Cannot be PETSC_DECIDE)
5715 .  n - This value should be the same as the local size used in creating the
5716        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5717        calculated if N is given) For square matrices n is almost always m.
5718 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5719 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5720 .   i - row indices for "diagonal" portion of matrix
5721 .   j - column indices
5722 .   a - matrix values
5723 .   oi - row indices for "off-diagonal" portion of matrix
5724 .   oj - column indices
5725 -   oa - matrix values
5726 
5727    Output Parameter:
5728 .   mat - the matrix
5729 
5730    Level: advanced
5731 
5732    Notes:
5733        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5734        must free the arrays once the matrix has been destroyed and not before.
5735 
5736        The i and j indices are 0 based
5737 
5738        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5739 
5740        This sets local rows and cannot be used to set off-processor values.
5741 
5742        You cannot later use MatSetValues() to change values in this matrix.
5743 
5744 .keywords: matrix, aij, compressed row, sparse, parallel
5745 
5746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5747           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5748 @*/
5749 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5750                                                PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5751 {
5752   PetscErrorCode ierr;
5753   Mat_MPIAIJ     *maij;
5754 
5755   PetscFunctionBegin;
5756   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5757   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5758   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5759   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5760   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5761   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5762   maij = (Mat_MPIAIJ*) (*mat)->data;
5763   maij->donotstash     = PETSC_TRUE;
5764   (*mat)->preallocated = PETSC_TRUE;
5765 
5766   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5767   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5768 
5769   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5770   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5771 
5772   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5774   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5775   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5776 
5777   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5778   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5779   PetscFunctionReturn(0);
5780 }
5781 
5782 /*
5783     Special version for direct calls from Fortran
5784 */
5785 #include <petsc-private/fortranimpl.h>
5786 
5787 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5788 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5789 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5790 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5791 #endif
5792 
5793 /* Change these macros so can be used in void function */
5794 #undef CHKERRQ
5795 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5796 #undef SETERRQ2
5797 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5798 #undef SETERRQ3
5799 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5800 #undef SETERRQ
5801 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5802 
5803 EXTERN_C_BEGIN
5804 #undef __FUNCT__
5805 #define __FUNCT__ "matsetvaluesmpiaij_"
5806 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5807 {
5808   Mat             mat = *mmat;
5809   PetscInt        m = *mm, n = *mn;
5810   InsertMode      addv = *maddv;
5811   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5812   PetscScalar     value;
5813   PetscErrorCode  ierr;
5814 
5815   MatCheckPreallocated(mat,1);
5816   if (mat->insertmode == NOT_SET_VALUES) {
5817     mat->insertmode = addv;
5818   }
5819 #if defined(PETSC_USE_DEBUG)
5820   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5821 #endif
5822   {
5823   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5824   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5825   PetscBool       roworiented = aij->roworiented;
5826 
5827   /* Some Variables required in the macro */
5828   Mat             A = aij->A;
5829   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5830   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5831   MatScalar       *aa = a->a;
5832   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5833   Mat             B = aij->B;
5834   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5835   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5836   MatScalar       *ba = b->a;
5837 
5838   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5839   PetscInt        nonew = a->nonew;
5840   MatScalar       *ap1,*ap2;
5841 
5842   PetscFunctionBegin;
5843   for (i=0; i<m; i++) {
5844     if (im[i] < 0) continue;
5845 #if defined(PETSC_USE_DEBUG)
5846     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5847 #endif
5848     if (im[i] >= rstart && im[i] < rend) {
5849       row      = im[i] - rstart;
5850       lastcol1 = -1;
5851       rp1      = aj + ai[row];
5852       ap1      = aa + ai[row];
5853       rmax1    = aimax[row];
5854       nrow1    = ailen[row];
5855       low1     = 0;
5856       high1    = nrow1;
5857       lastcol2 = -1;
5858       rp2      = bj + bi[row];
5859       ap2      = ba + bi[row];
5860       rmax2    = bimax[row];
5861       nrow2    = bilen[row];
5862       low2     = 0;
5863       high2    = nrow2;
5864 
5865       for (j=0; j<n; j++) {
5866         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5867         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5868         if (in[j] >= cstart && in[j] < cend) {
5869           col = in[j] - cstart;
5870           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5871         } else if (in[j] < 0) continue;
5872 #if defined(PETSC_USE_DEBUG)
5873         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5874 #endif
5875         else {
5876           if (mat->was_assembled) {
5877             if (!aij->colmap) {
5878               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5879             }
5880 #if defined (PETSC_USE_CTABLE)
5881             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5882             col--;
5883 #else
5884             col = aij->colmap[in[j]] - 1;
5885 #endif
5886             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5887               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5888               col =  in[j];
5889               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5890               B = aij->B;
5891               b = (Mat_SeqAIJ*)B->data;
5892               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5893               rp2      = bj + bi[row];
5894               ap2      = ba + bi[row];
5895               rmax2    = bimax[row];
5896               nrow2    = bilen[row];
5897               low2     = 0;
5898               high2    = nrow2;
5899               bm       = aij->B->rmap->n;
5900               ba = b->a;
5901             }
5902           } else col = in[j];
5903           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5904         }
5905       }
5906     } else {
5907       if (!aij->donotstash) {
5908         if (roworiented) {
5909           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5910         } else {
5911           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5912         }
5913       }
5914     }
5915   }}
5916   PetscFunctionReturnVoid();
5917 }
5918 EXTERN_C_END
5919 
5920