xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ef03ded036f36641663ecc6dce7b752c397a5081)
1 /*$Id: mpiaij.c,v 1.319 2000/08/30 20:58:56 bsmith Exp balay $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ /*<a name="CreateColmap_MPIAIJ_Private"></a>*/"CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(aij->n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int*)PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<a->m+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<b->m+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
198   Scalar     value;
199   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int        cstart = aij->cstart,cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
206   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208   PetscTruth ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE:PETSC_FALSE);
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
211   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         }
234         else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_BOPT_g)
236         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!aij->colmap) {
241               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined (PETSC_USE_CTABLE)
244             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
245 	    col--;
246 #else
247             col = aij->colmap[in[j]] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
250               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
251               col =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
253               B = aij->B;
254               b = (Mat_SeqAIJ*)B->data;
255               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
256               ba = b->a;
257             }
258           } else col = in[j];
259           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
260           if (ignorezeroentries && value == 0.0) continue;
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           if (ignorezeroentries && v[i*n] == 0.0) continue;
269           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
270         } else {
271           if (ignorezeroentries && v[i] == 0.0) continue;
272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
273         }
274       }
275     }
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNC__
281 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ"
282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
283 {
284   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
285   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
286   int        cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
291     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
296         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNC__
324 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ"
325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
328   int         ierr,nstash,reallocs;
329   InsertMode  addv;
330 
331   PetscFunctionBegin;
332   if (aij->donotstash) {
333     PetscFunctionReturn(0);
334   }
335 
336   /* make sure all processors are either in INSERTMODE or ADDMODE */
337   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
338   if (addv == (ADD_VALUES|INSERT_VALUES)) {
339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
340   }
341   mat->insertmode = addv; /* in case this processor had no cache */
342 
343   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
344   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
345   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 #undef __FUNC__
351 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ"
352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
355   int         i,j,rstart,ncols,n,ierr,flg;
356   int         *row,*col,other_disassembled;
357   Scalar      *val;
358   InsertMode  addv = mat->insertmode;
359 
360   PetscFunctionBegin;
361   if (!aij->donotstash) {
362     while (1) {
363       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
364       if (!flg) break;
365 
366       for (i=0; i<n;) {
367         /* Now identify the consecutive vals belonging to the same row */
368         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
369         if (j < n) ncols = j-i;
370         else       ncols = n-i;
371         /* Now assemble all these values with a single function call */
372         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
373         i = j;
374       }
375     }
376     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
377   }
378 
379   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
380   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
381 
382   /* determine if any processor has disassembled, if so we must
383      also disassemble ourselfs, in order that we may reassemble. */
384   /*
385      if nonzero structure of submatrix B cannot change then we know that
386      no processor disassembled thus we can skip this stuff
387   */
388   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
389     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
390     if (mat->was_assembled && !other_disassembled) {
391       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
392     }
393   }
394 
395   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
396     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
397   }
398   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {
402     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
403     aij->rowvalues = 0;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ"
410 int MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   int        ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNC__
422 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ"
423 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
427   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
428   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
429   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
430   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
431   MPI_Comm       comm = A->comm;
432   MPI_Request    *send_waits,*recv_waits;
433   MPI_Status     recv_status,*send_status;
434   IS             istmp;
435 
436   PetscFunctionBegin;
437   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
438   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
439 
440   /*  first count number of contributors to each processor */
441   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
442   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
443   procs  = nprocs + size;
444   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
445   for (i=0; i<N; i++) {
446     idx = rows[i];
447     found = 0;
448     for (j=0; j<size; j++) {
449       if (idx >= owners[j] && idx < owners[j+1]) {
450         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
451       }
452     }
453     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
454   }
455   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
456 
457   /* inform other processors of number of messages and max length*/
458   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
459   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
460   nrecvs = work[size+rank];
461   nmax   = work[rank];
462   ierr   = PetscFree(work);CHKERRQ(ierr);
463 
464   /* post receives:   */
465   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
466   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
467   for (i=0; i<nrecvs; i++) {
468     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
469   }
470 
471   /* do sends:
472       1) starts[i] gives the starting index in svalues for stuff going to
473          the ith processor
474   */
475   svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
476   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
477   starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
478   starts[0] = 0;
479   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
480   for (i=0; i<N; i++) {
481     svalues[starts[owner[i]]++] = rows[i];
482   }
483   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
484 
485   starts[0] = 0;
486   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   count = 0;
488   for (i=0; i<size; i++) {
489     if (procs[i]) {
490       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
491     }
492   }
493   ierr = PetscFree(starts);CHKERRQ(ierr);
494 
495   base = owners[rank];
496 
497   /*  wait on receives */
498   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
499   source = lens + nrecvs;
500   count  = nrecvs; slen = 0;
501   while (count) {
502     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
503     /* unpack receives into our local space */
504     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
505     source[imdex]  = recv_status.MPI_SOURCE;
506     lens[imdex]    = n;
507     slen          += n;
508     count--;
509   }
510   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
511 
512   /* move the data into the send scatter */
513   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
514   count = 0;
515   for (i=0; i<nrecvs; i++) {
516     values = rvalues + i*nmax;
517     for (j=0; j<lens[i]; j++) {
518       lrows[count++] = values[j] - base;
519     }
520   }
521   ierr = PetscFree(rvalues);CHKERRQ(ierr);
522   ierr = PetscFree(lens);CHKERRQ(ierr);
523   ierr = PetscFree(owner);CHKERRQ(ierr);
524   ierr = PetscFree(nprocs);CHKERRQ(ierr);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PLogObjectParent(A,istmp);
529 
530   /*
531         Zero the required rows. If the "diagonal block" of the matrix
532      is square and the user wishes to set the diagonal we use seperate
533      code so that MatSetValues() is not called for each diagonal allocating
534      new memory, thus calling lots of mallocs and slowing things down.
535 
536        Contributed by: Mathew Knepley
537   */
538   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
539   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
540   if (diag && (l->A->M == l->A->N)) {
541     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
542   } else if (diag) {
543     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
544     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
545       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
547     }
548     for (i = 0; i < slen; i++) {
549       row  = lrows[i] + rstart;
550       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
551     }
552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554   } else {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556   }
557   ierr = ISDestroy(istmp);CHKERRQ(ierr);
558   ierr = PetscFree(lrows);CHKERRQ(ierr);
559 
560   /* wait on sends */
561   if (nsends) {
562     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
563     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
564     ierr        = PetscFree(send_status);CHKERRQ(ierr);
565   }
566   ierr = PetscFree(send_waits);CHKERRQ(ierr);
567   ierr = PetscFree(svalues);CHKERRQ(ierr);
568 
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNC__
573 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ"
574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
575 {
576   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577   int        ierr,nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != a->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNC__
592 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ"
593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNC__
607 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ"
608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   int        ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ"
629 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
630 {
631   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
632   int        ierr;
633 
634   PetscFunctionBegin;
635   /* do nondiagonal part */
636   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
637   /* send it on its way */
638   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
639   /* do local part */
640   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
641   /* receive remote parts: note this assumes the values are not actually */
642   /* inserted in yy until the next line, which is true for my implementation*/
643   /* but is not perhaps always true. */
644   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 /*
649   This only works correctly for square matrices where the subblock A->A is the
650    diagonal block
651 */
652 #undef __FUNC__
653 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ"
654 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
655 {
656   int        ierr;
657   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
658 
659   PetscFunctionBegin;
660   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
661   if (a->rstart != a->cstart || a->rend != a->cend) {
662     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
663   }
664   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ"
670 int MatScale_MPIAIJ(Scalar *aa,Mat A)
671 {
672   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
673   int        ierr;
674 
675   PetscFunctionBegin;
676   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
677   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNC__
682 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ"
683 int MatDestroy_MPIAIJ(Mat mat)
684 {
685   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689 
690   if (mat->mapping) {
691     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
692   }
693   if (mat->bmapping) {
694     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
695   }
696   if (mat->rmap) {
697     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
698   }
699   if (mat->cmap) {
700     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
701   }
702 #if defined(PETSC_USE_LOG)
703   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PLogObjectDestroy(mat);
720   PetscHeaderDestroy(mat);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNC__
725 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket"
726 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
727 {
728   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
729   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
730   int         ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size;
731   PetscTruth  isdraw,isascii,flg;
732   Viewer      sviewer;
733 
734   PetscFunctionBegin;
735   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
736   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
737   if (isascii) {
738     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
739     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
740       MatInfo info;
741       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
742       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
743       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
744       if (flg) {
745         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
746 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
747       } else {
748         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
749 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
750       }
751       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
752       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
753       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
754       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
755       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
756       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
757       PetscFunctionReturn(0);
758     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
759       PetscFunctionReturn(0);
760     }
761   } else if (isdraw) {
762     Draw       draw;
763     PetscTruth isnull;
764     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
765     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
766   }
767 
768   if (size == 1) {
769     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
770   } else {
771     /* assemble the entire matrix onto first processor. */
772     Mat         A;
773     Mat_SeqAIJ *Aloc;
774     int         M = aij->M,N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
775     Scalar      *a;
776 
777     if (!rank) {
778       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
779     } else {
780       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
781     }
782     PLogObjectParent(mat,A);
783 
784     /* copy over the A part */
785     Aloc = (Mat_SeqAIJ*)aij->A->data;
786     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
787     row = aij->rstart;
788     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
792     }
793     aj = Aloc->j;
794     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
795 
796     /* copy over the B part */
797     Aloc = (Mat_SeqAIJ*)aij->B->data;
798     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
799     row = aij->rstart;
800     ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols);
801     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
802     for (i=0; i<m; i++) {
803       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
804       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
805     }
806     ierr = PetscFree(ct);CHKERRQ(ierr);
807     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809     /*
810        Everyone has to call to draw the matrix since the graphics waits are
811        synchronized across all processors that share the Draw object
812     */
813     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
814     if (!rank) {
815       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
816     }
817     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
818     ierr = MatDestroy(A);CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNC__
824 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ"
825 int MatView_MPIAIJ(Mat mat,Viewer viewer)
826 {
827   int        ierr;
828   PetscTruth isascii,isdraw,issocket,isbinary;
829 
830   PetscFunctionBegin;
831   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
832   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
833   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
834   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
835   if (isascii || isdraw || isbinary || issocket) {
836     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
837   } else {
838     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
839   }
840   PetscFunctionReturn(0);
841 }
842 
843 /*
844     This has to provide several versions.
845 
846      2) a) use only local smoothing updating outer values only once.
847         b) local smoothing updating outer values each inner iteration
848      3) color updating out values betwen colors.
849 */
850 #undef __FUNC__
851 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ"
852 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,
853                            PetscReal fshift,int its,Vec xx)
854 {
855   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
856   Mat        AA = mat->A,BB = mat->B;
857   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
858   Scalar     *b,*x,*xs,*ls,d,*v,sum;
859   int        ierr,*idx,*diag;
860   int        n = mat->n,m = mat->m,i,shift = A->indexshift;
861 
862   PetscFunctionBegin;
863   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
864   diag = A->diag;
865   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
866     if (flag & SOR_ZERO_INITIAL_GUESS) {
867       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
868       PetscFunctionReturn(0);
869     }
870     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
871     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
872     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
873     if (xx != bb) {
874       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
875     } else {
876       b = x;
877     }
878     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
879     xs = x + shift; /* shift by one for index start of 1 */
880     ls = ls + shift;
881     while (its--) {
882       /* go down through the rows */
883       for (i=0; i<m; i++) {
884         n    = A->i[i+1] - A->i[i];
885 	PLogFlops(4*n+3);
886         idx  = A->j + A->i[i] + shift;
887         v    = A->a + A->i[i] + shift;
888         sum  = b[i];
889         SPARSEDENSEMDOT(sum,xs,v,idx,n);
890         d    = fshift + A->a[diag[i]+shift];
891         n    = B->i[i+1] - B->i[i];
892         idx  = B->j + B->i[i] + shift;
893         v    = B->a + B->i[i] + shift;
894         SPARSEDENSEMDOT(sum,ls,v,idx,n);
895         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
896       }
897       /* come up through the rows */
898       for (i=m-1; i>-1; i--) {
899         n    = A->i[i+1] - A->i[i];
900 	PLogFlops(4*n+3);
901         idx  = A->j + A->i[i] + shift;
902         v    = A->a + A->i[i] + shift;
903         sum  = b[i];
904         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905         d    = fshift + A->a[diag[i]+shift];
906         n    = B->i[i+1] - B->i[i];
907         idx  = B->j + B->i[i] + shift;
908         v    = B->a + B->i[i] + shift;
909         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
911       }
912     }
913     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
914     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
915     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
916   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
917     if (flag & SOR_ZERO_INITIAL_GUESS) {
918       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
919       PetscFunctionReturn(0);
920     }
921     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
922     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
923     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
924     if (xx != bb) {
925       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
926     } else {
927       b = x;
928     }
929     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
930     xs = x + shift; /* shift by one for index start of 1 */
931     ls = ls + shift;
932     while (its--) {
933       for (i=0; i<m; i++) {
934         n    = A->i[i+1] - A->i[i];
935 	PLogFlops(4*n+3);
936         idx  = A->j + A->i[i] + shift;
937         v    = A->a + A->i[i] + shift;
938         sum  = b[i];
939         SPARSEDENSEMDOT(sum,xs,v,idx,n);
940         d    = fshift + A->a[diag[i]+shift];
941         n    = B->i[i+1] - B->i[i];
942         idx  = B->j + B->i[i] + shift;
943         v    = B->a + B->i[i] + shift;
944         SPARSEDENSEMDOT(sum,ls,v,idx,n);
945         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
946       }
947     }
948     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
949     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
950     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
951   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
952     if (flag & SOR_ZERO_INITIAL_GUESS) {
953       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
954       PetscFunctionReturn(0);
955     }
956     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
957     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
958     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
959     if (xx != bb) {
960       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
961     } else {
962       b = x;
963     }
964     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
965     xs = x + shift; /* shift by one for index start of 1 */
966     ls = ls + shift;
967     while (its--) {
968       for (i=m-1; i>-1; i--) {
969         n    = A->i[i+1] - A->i[i];
970 	PLogFlops(4*n+3);
971         idx  = A->j + A->i[i] + shift;
972         v    = A->a + A->i[i] + shift;
973         sum  = b[i];
974         SPARSEDENSEMDOT(sum,xs,v,idx,n);
975         d    = fshift + A->a[diag[i]+shift];
976         n    = B->i[i+1] - B->i[i];
977         idx  = B->j + B->i[i] + shift;
978         v    = B->a + B->i[i] + shift;
979         SPARSEDENSEMDOT(sum,ls,v,idx,n);
980         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
981       }
982     }
983     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
984     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
985     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
986   } else {
987     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNC__
993 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ"
994 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
995 {
996   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
997   Mat        A = mat->A,B = mat->B;
998   int        ierr;
999   PetscReal  isend[5],irecv[5];
1000 
1001   PetscFunctionBegin;
1002   info->block_size     = 1.0;
1003   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1004   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1005   isend[3] = info->memory;  isend[4] = info->mallocs;
1006   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1007   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1008   isend[3] += info->memory;  isend[4] += info->mallocs;
1009   if (flag == MAT_LOCAL) {
1010     info->nz_used      = isend[0];
1011     info->nz_allocated = isend[1];
1012     info->nz_unneeded  = isend[2];
1013     info->memory       = isend[3];
1014     info->mallocs      = isend[4];
1015   } else if (flag == MAT_GLOBAL_MAX) {
1016     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1017     info->nz_used      = irecv[0];
1018     info->nz_allocated = irecv[1];
1019     info->nz_unneeded  = irecv[2];
1020     info->memory       = irecv[3];
1021     info->mallocs      = irecv[4];
1022   } else if (flag == MAT_GLOBAL_SUM) {
1023     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1024     info->nz_used      = irecv[0];
1025     info->nz_allocated = irecv[1];
1026     info->nz_unneeded  = irecv[2];
1027     info->memory       = irecv[3];
1028     info->mallocs      = irecv[4];
1029   }
1030   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1031   info->fill_ratio_needed = 0;
1032   info->factor_mallocs    = 0;
1033   info->rows_global       = (double)mat->M;
1034   info->columns_global    = (double)mat->N;
1035   info->rows_local        = (double)mat->m;
1036   info->columns_local     = (double)mat->N;
1037 
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNC__
1042 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ"
1043 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1044 {
1045   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1046   int        ierr;
1047 
1048   PetscFunctionBegin;
1049   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1050       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1051       op == MAT_COLUMNS_UNSORTED ||
1052       op == MAT_COLUMNS_SORTED ||
1053       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1054       op == MAT_KEEP_ZEROED_ROWS ||
1055       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1056       op == MAT_USE_INODES ||
1057       op == MAT_DO_NOT_USE_INODES ||
1058       op == MAT_IGNORE_ZERO_ENTRIES) {
1059         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1060         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1061   } else if (op == MAT_ROW_ORIENTED) {
1062     a->roworiented = PETSC_TRUE;
1063     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1064     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1065   } else if (op == MAT_ROWS_SORTED ||
1066              op == MAT_ROWS_UNSORTED ||
1067              op == MAT_SYMMETRIC ||
1068              op == MAT_STRUCTURALLY_SYMMETRIC ||
1069              op == MAT_YES_NEW_DIAGONALS) {
1070     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1071   } else if (op == MAT_COLUMN_ORIENTED) {
1072     a->roworiented = PETSC_FALSE;
1073     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1074     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1075   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1076     a->donotstash = PETSC_TRUE;
1077   } else if (op == MAT_NO_NEW_DIAGONALS){
1078     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNC__
1086 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIAIJ"
1087 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1088 {
1089   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1090 
1091   PetscFunctionBegin;
1092   if (m) *m = mat->M;
1093   if (n) *n = mat->N;
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNC__
1098 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIAIJ"
1099 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1100 {
1101   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1102 
1103   PetscFunctionBegin;
1104   if (m) *m = mat->m;
1105   if (n) *n = mat->n;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNC__
1110 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ"
1111 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1112 {
1113   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1114 
1115   PetscFunctionBegin;
1116   if (m) *m = mat->rstart;
1117   if (n) *n = mat->rend;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNC__
1122 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ"
1123 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1124 {
1125   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1126   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1127   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1128   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1129   int        *cmap,*idx_p;
1130 
1131   PetscFunctionBegin;
1132   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1133   mat->getrowactive = PETSC_TRUE;
1134 
1135   if (!mat->rowvalues && (idx || v)) {
1136     /*
1137         allocate enough space to hold information from the longest row.
1138     */
1139     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1140     int     max = 1,m = mat->m,tmp;
1141     for (i=0; i<m; i++) {
1142       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1143       if (max < tmp) { max = tmp; }
1144     }
1145     mat->rowvalues  = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1146     mat->rowindices = (int*)(mat->rowvalues + max);
1147   }
1148 
1149   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1150   lrow = row - rstart;
1151 
1152   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1153   if (!v)   {pvA = 0; pvB = 0;}
1154   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1155   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1156   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1157   nztot = nzA + nzB;
1158 
1159   cmap  = mat->garray;
1160   if (v  || idx) {
1161     if (nztot) {
1162       /* Sort by increasing column numbers, assuming A and B already sorted */
1163       int imark = -1;
1164       if (v) {
1165         *v = v_p = mat->rowvalues;
1166         for (i=0; i<nzB; i++) {
1167           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1168           else break;
1169         }
1170         imark = i;
1171         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1172         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1173       }
1174       if (idx) {
1175         *idx = idx_p = mat->rowindices;
1176         if (imark > -1) {
1177           for (i=0; i<imark; i++) {
1178             idx_p[i] = cmap[cworkB[i]];
1179           }
1180         } else {
1181           for (i=0; i<nzB; i++) {
1182             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1183             else break;
1184           }
1185           imark = i;
1186         }
1187         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1188         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1189       }
1190     } else {
1191       if (idx) *idx = 0;
1192       if (v)   *v   = 0;
1193     }
1194   }
1195   *nz = nztot;
1196   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1197   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNC__
1202 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ"
1203 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1204 {
1205   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1206 
1207   PetscFunctionBegin;
1208   if (aij->getrowactive == PETSC_FALSE) {
1209     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1210   }
1211   aij->getrowactive = PETSC_FALSE;
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #undef __FUNC__
1216 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ"
1217 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1218 {
1219   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1220   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1221   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1222   PetscReal  sum = 0.0;
1223   Scalar     *v;
1224 
1225   PetscFunctionBegin;
1226   if (aij->size == 1) {
1227     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1228   } else {
1229     if (type == NORM_FROBENIUS) {
1230       v = amat->a;
1231       for (i=0; i<amat->nz; i++) {
1232 #if defined(PETSC_USE_COMPLEX)
1233         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1234 #else
1235         sum += (*v)*(*v); v++;
1236 #endif
1237       }
1238       v = bmat->a;
1239       for (i=0; i<bmat->nz; i++) {
1240 #if defined(PETSC_USE_COMPLEX)
1241         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1242 #else
1243         sum += (*v)*(*v); v++;
1244 #endif
1245       }
1246       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1247       *norm = sqrt(*norm);
1248     } else if (type == NORM_1) { /* max column norm */
1249       PetscReal *tmp,*tmp2;
1250       int    *jj,*garray = aij->garray;
1251       tmp  = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp);
1252       tmp2 = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2);
1253       ierr = PetscMemzero(tmp,aij->N*sizeof(PetscReal));CHKERRQ(ierr);
1254       *norm = 0.0;
1255       v = amat->a; jj = amat->j;
1256       for (j=0; j<amat->nz; j++) {
1257         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1258       }
1259       v = bmat->a; jj = bmat->j;
1260       for (j=0; j<bmat->nz; j++) {
1261         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1262       }
1263       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1264       for (j=0; j<aij->N; j++) {
1265         if (tmp2[j] > *norm) *norm = tmp2[j];
1266       }
1267       ierr = PetscFree(tmp);CHKERRQ(ierr);
1268       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1269     } else if (type == NORM_INFINITY) { /* max row norm */
1270       PetscReal ntemp = 0.0;
1271       for (j=0; j<amat->m; j++) {
1272         v = amat->a + amat->i[j] + shift;
1273         sum = 0.0;
1274         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1275           sum += PetscAbsScalar(*v); v++;
1276         }
1277         v = bmat->a + bmat->i[j] + shift;
1278         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1279           sum += PetscAbsScalar(*v); v++;
1280         }
1281         if (sum > ntemp) ntemp = sum;
1282       }
1283       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1284     } else {
1285       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1286     }
1287   }
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNC__
1292 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ"
1293 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1294 {
1295   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1296   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1297   int        ierr,shift = Aloc->indexshift;
1298   int        M = a->M,N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1299   Mat        B;
1300   Scalar     *array;
1301 
1302   PetscFunctionBegin;
1303   if (!matout && M != N) {
1304     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1305   }
1306 
1307   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1308 
1309   /* copy over the A part */
1310   Aloc = (Mat_SeqAIJ*)a->A->data;
1311   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1312   row = a->rstart;
1313   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1314   for (i=0; i<m; i++) {
1315     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1316     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1317   }
1318   aj = Aloc->j;
1319   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1320 
1321   /* copy over the B part */
1322   Aloc = (Mat_SeqAIJ*)a->B->data;
1323   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1324   row = a->rstart;
1325   ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols);
1326   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1327   for (i=0; i<m; i++) {
1328     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1329     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1330   }
1331   ierr = PetscFree(ct);CHKERRQ(ierr);
1332   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1333   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1334   if (matout) {
1335     *matout = B;
1336   } else {
1337     PetscOps       *Abops;
1338     struct _MatOps *Aops;
1339 
1340     /* This isn't really an in-place transpose .... but free data structures from a */
1341     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1342     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1343     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1344 #if defined (PETSC_USE_CTABLE)
1345     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1346 #else
1347     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1348 #endif
1349     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1350     if (a->lvec)   {ierr = VecDestroy(a->lvec);CHKERRQ(ierr);}
1351     if (a->Mvctx)  {ierr = VecScatterDestroy(a->Mvctx);CHKERRQ(ierr);}
1352     ierr = PetscFree(a);CHKERRQ(ierr);
1353 
1354     /*
1355        This is horrible, horrible code. We need to keep the
1356       A pointers for the bops and ops but copy everything
1357       else from C.
1358     */
1359     Abops   = A->bops;
1360     Aops    = A->ops;
1361     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1362     A->bops = Abops;
1363     A->ops  = Aops;
1364     PetscHeaderDestroy(B);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNC__
1370 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ"
1371 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1372 {
1373   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1374   Mat        a = aij->A,b = aij->B;
1375   int        ierr,s1,s2,s3;
1376 
1377   PetscFunctionBegin;
1378   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1379   if (rr) {
1380     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1381     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1382     /* Overlap communication with computation. */
1383     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1384   }
1385   if (ll) {
1386     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1387     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1388     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1389   }
1390   /* scale  the diagonal block */
1391   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1392 
1393   if (rr) {
1394     /* Do a scatter end and then right scale the off-diagonal block */
1395     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1396     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1397   }
1398 
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 
1403 #undef __FUNC__
1404 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ"
1405 int MatPrintHelp_MPIAIJ(Mat A)
1406 {
1407   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1408   int        ierr;
1409 
1410   PetscFunctionBegin;
1411   if (!a->rank) {
1412     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNC__
1418 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ"
1419 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1420 {
1421   PetscFunctionBegin;
1422   *bs = 1;
1423   PetscFunctionReturn(0);
1424 }
1425 #undef __FUNC__
1426 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ"
1427 int MatSetUnfactored_MPIAIJ(Mat A)
1428 {
1429   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1430   int        ierr;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNC__
1438 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ"
1439 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1440 {
1441   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1442   Mat        a,b,c,d;
1443   PetscTruth flg;
1444   int        ierr;
1445 
1446   PetscFunctionBegin;
1447   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1448   a = matA->A; b = matA->B;
1449   c = matB->A; d = matB->B;
1450 
1451   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1452   if (flg == PETSC_TRUE) {
1453     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1454   }
1455   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNC__
1460 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ"
1461 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1462 {
1463   int        ierr;
1464   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1465   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1466 
1467   PetscFunctionBegin;
1468   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1469     /* because of the column compression in the off-processor part of the matrix a->B,
1470        the number of columns in a->B and b->B may be different, hence we cannot call
1471        the MatCopy() directly on the two parts. If need be, we can provide a more
1472        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1473        then copying the submatrices */
1474     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1475   } else {
1476     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1477     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1483 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1484 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1485 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1486 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1487 
1488 /* -------------------------------------------------------------------*/
1489 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1490        MatGetRow_MPIAIJ,
1491        MatRestoreRow_MPIAIJ,
1492        MatMult_MPIAIJ,
1493        MatMultAdd_MPIAIJ,
1494        MatMultTranspose_MPIAIJ,
1495        MatMultTransposeAdd_MPIAIJ,
1496        0,
1497        0,
1498        0,
1499        0,
1500        0,
1501        0,
1502        MatRelax_MPIAIJ,
1503        MatTranspose_MPIAIJ,
1504        MatGetInfo_MPIAIJ,
1505        MatEqual_MPIAIJ,
1506        MatGetDiagonal_MPIAIJ,
1507        MatDiagonalScale_MPIAIJ,
1508        MatNorm_MPIAIJ,
1509        MatAssemblyBegin_MPIAIJ,
1510        MatAssemblyEnd_MPIAIJ,
1511        0,
1512        MatSetOption_MPIAIJ,
1513        MatZeroEntries_MPIAIJ,
1514        MatZeroRows_MPIAIJ,
1515        0,
1516        0,
1517        0,
1518        0,
1519        MatGetSize_MPIAIJ,
1520        MatGetLocalSize_MPIAIJ,
1521        MatGetOwnershipRange_MPIAIJ,
1522        0,
1523        0,
1524        0,
1525        0,
1526        MatDuplicate_MPIAIJ,
1527        0,
1528        0,
1529        0,
1530        0,
1531        0,
1532        MatGetSubMatrices_MPIAIJ,
1533        MatIncreaseOverlap_MPIAIJ,
1534        MatGetValues_MPIAIJ,
1535        MatCopy_MPIAIJ,
1536        MatPrintHelp_MPIAIJ,
1537        MatScale_MPIAIJ,
1538        0,
1539        0,
1540        0,
1541        MatGetBlockSize_MPIAIJ,
1542        0,
1543        0,
1544        0,
1545        0,
1546        MatFDColoringCreate_MPIAIJ,
1547        0,
1548        MatSetUnfactored_MPIAIJ,
1549        0,
1550        0,
1551        MatGetSubMatrix_MPIAIJ,
1552        MatDestroy_MPIAIJ,
1553        MatView_MPIAIJ,
1554        MatGetMaps_Petsc};
1555 
1556 /* ----------------------------------------------------------------------------------------*/
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNC__
1560 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ"
1561 int MatStoreValues_MPIAIJ(Mat mat)
1562 {
1563   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1564   int        ierr;
1565 
1566   PetscFunctionBegin;
1567   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1568   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 EXTERN_C_END
1572 
1573 EXTERN_C_BEGIN
1574 #undef __FUNC__
1575 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ"
1576 int MatRetrieveValues_MPIAIJ(Mat mat)
1577 {
1578   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1579   int        ierr;
1580 
1581   PetscFunctionBegin;
1582   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1583   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 EXTERN_C_END
1587 
1588 #include "petscpc.h"
1589 EXTERN_C_BEGIN
1590 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1591 EXTERN_C_END
1592 
1593 #undef __FUNC__
1594 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ"
1595 /*@C
1596    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1597    (the default parallel PETSc format).  For good matrix assembly performance
1598    the user should preallocate the matrix storage by setting the parameters
1599    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1600    performance can be increased by more than a factor of 50.
1601 
1602    Collective on MPI_Comm
1603 
1604    Input Parameters:
1605 +  comm - MPI communicator
1606 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1607            This value should be the same as the local size used in creating the
1608            y vector for the matrix-vector product y = Ax.
1609 .  n - This value should be the same as the local size used in creating the
1610        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1611        calculated if N is given) For square matrices n is almost always m.
1612 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1613 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1614 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1615            (same value is used for all local rows)
1616 .  d_nnz - array containing the number of nonzeros in the various rows of the
1617            DIAGONAL portion of the local submatrix (possibly different for each row)
1618            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1619            The size of this array is equal to the number of local rows, i.e 'm'.
1620            You must leave room for the diagonal entry even if it is zero.
1621 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1622            submatrix (same value is used for all local rows).
1623 -  o_nnz - array containing the number of nonzeros in the various rows of the
1624            OFF-DIAGONAL portion of the local submatrix (possibly different for
1625            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1626            structure. The size of this array is equal to the number
1627            of local rows, i.e 'm'.
1628 
1629    Output Parameter:
1630 .  A - the matrix
1631 
1632    Notes:
1633    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1634    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1635    storage requirements for this matrix.
1636 
1637    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1638    processor than it must be used on all processors that share the object for
1639    that argument.
1640 
1641    The AIJ format (also called the Yale sparse matrix format or
1642    compressed row storage), is fully compatible with standard Fortran 77
1643    storage.  That is, the stored row and column indices can begin at
1644    either one (as in Fortran) or zero.  See the users manual for details.
1645 
1646    The user MUST specify either the local or global matrix dimensions
1647    (possibly both).
1648 
1649    The parallel matrix is partitioned such that the first m0 rows belong to
1650    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1651    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1652 
1653    The DIAGONAL portion of the local submatrix of a processor can be defined
1654    as the submatrix which is obtained by extraction the part corresponding
1655    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1656    first row that belongs to the processor, and r2 is the last row belonging
1657    to the this processor. This is a square mxm matrix. The remaining portion
1658    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1659 
1660    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1661 
1662    By default, this format uses inodes (identical nodes) when possible.
1663    We search for consecutive rows with the same nonzero structure, thereby
1664    reusing matrix information to achieve increased efficiency.
1665 
1666    Options Database Keys:
1667 +  -mat_aij_no_inode  - Do not use inodes
1668 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1669 -  -mat_aij_oneindex - Internally use indexing starting at 1
1670         rather than 0.  Note that when calling MatSetValues(),
1671         the user still MUST index entries starting at 0!
1672 .   -mat_mpi - use the parallel matrix data structures even on one processor
1673                (defaults to using SeqBAIJ format on one processor)
1674 .   -mat_mpi - use the parallel matrix data structures even on one processor
1675                (defaults to using SeqAIJ format on one processor)
1676 
1677 
1678    Example usage:
1679 
1680    Consider the following 8x8 matrix with 34 non-zero values, that is
1681    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1682    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1683    as follows:
1684 
1685 .vb
1686             1  2  0  |  0  3  0  |  0  4
1687     Proc0   0  5  6  |  7  0  0  |  8  0
1688             9  0 10  | 11  0  0  | 12  0
1689     -------------------------------------
1690            13  0 14  | 15 16 17  |  0  0
1691     Proc1   0 18  0  | 19 20 21  |  0  0
1692             0  0  0  | 22 23  0  | 24  0
1693     -------------------------------------
1694     Proc2  25 26 27  |  0  0 28  | 29  0
1695            30  0  0  | 31 32 33  |  0 34
1696 .ve
1697 
1698    This can be represented as a collection of submatrices as:
1699 
1700 .vb
1701       A B C
1702       D E F
1703       G H I
1704 .ve
1705 
1706    Where the submatrices A,B,C are owned by proc0, D,E,F are
1707    owned by proc1, G,H,I are owned by proc2.
1708 
1709    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1710    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1711    The 'M','N' parameters are 8,8, and have the same values on all procs.
1712 
1713    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1714    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1715    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1716    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1717    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1718    matrix, ans [DF] as another SeqAIJ matrix.
1719 
1720    When d_nz, o_nz parameters are specified, d_nz storage elements are
1721    allocated for every row of the local diagonal submatrix, and o_nz
1722    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1723    One way to choose d_nz and o_nz is to use the max nonzerors per local
1724    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1725    In this case, the values of d_nz,o_nz are:
1726 .vb
1727      proc0 : dnz = 2, o_nz = 2
1728      proc1 : dnz = 3, o_nz = 2
1729      proc2 : dnz = 1, o_nz = 4
1730 .ve
1731    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1732    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1733    for proc3. i.e we are using 12+15+10=37 storage locations to store
1734    34 values.
1735 
1736    When d_nnz, o_nnz parameters are specified, the storage is specified
1737    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1738    In the above case the values for d_nnz,o_nnz are:
1739 .vb
1740      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1741      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1742      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1743 .ve
1744    Here the space allocated is sum of all the above values i.e 34, and
1745    hence pre-allocation is perfect.
1746 
1747    Level: intermediate
1748 
1749 .keywords: matrix, aij, compressed row, sparse, parallel
1750 
1751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1752 @*/
1753 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1754 {
1755   Mat          B;
1756   Mat_MPIAIJ   *b;
1757   int          ierr,i,size;
1758   PetscTruth   flag1,flag2;
1759 
1760   PetscFunctionBegin;
1761 
1762   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1763   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1764   if (d_nnz) {
1765     for (i=0; i<m; i++) {
1766       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1767     }
1768   }
1769   if (o_nnz) {
1770     for (i=0; i<m; i++) {
1771       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1772     }
1773   }
1774 
1775   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1776   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1777   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1778   if (!flag1 && !flag2 && size == 1) {
1779     if (M == PETSC_DECIDE) M = m;
1780     if (N == PETSC_DECIDE) N = n;
1781     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1782     PetscFunctionReturn(0);
1783   }
1784 
1785   *A = 0;
1786   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1787   PLogObjectCreate(B);
1788   B->data         = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1789   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1790   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1791   B->factor       = 0;
1792   B->assembled    = PETSC_FALSE;
1793   B->mapping      = 0;
1794 
1795   B->insertmode      = NOT_SET_VALUES;
1796   b->size            = size;
1797   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1798 
1799   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
1800     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1801   }
1802 
1803   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1804   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1805   b->m = m; B->m = m;
1806   b->n = n; B->n = n;
1807   b->N = N; B->N = N;
1808   b->M = M; B->M = M;
1809 
1810   /* the information in the maps duplicates the information computed below, eventually
1811      we should remove the duplicate information that is not contained in the maps */
1812   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
1813   ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr);
1814 
1815   /* build local table of row and column ownerships */
1816   b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1817   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1818   b->cowners = b->rowners + b->size + 2;
1819   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1820   b->rowners[0] = 0;
1821   for (i=2; i<=b->size; i++) {
1822     b->rowners[i] += b->rowners[i-1];
1823   }
1824   b->rstart = b->rowners[b->rank];
1825   b->rend   = b->rowners[b->rank+1];
1826   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1827   b->cowners[0] = 0;
1828   for (i=2; i<=b->size; i++) {
1829     b->cowners[i] += b->cowners[i-1];
1830   }
1831   b->cstart = b->cowners[b->rank];
1832   b->cend   = b->cowners[b->rank+1];
1833 
1834   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1835   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1836   PLogObjectParent(B,b->A);
1837   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1838   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1839   PLogObjectParent(B,b->B);
1840 
1841   /* build cache for off array entries formed */
1842   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1843   b->donotstash  = PETSC_FALSE;
1844   b->colmap      = 0;
1845   b->garray      = 0;
1846   b->roworiented = PETSC_TRUE;
1847 
1848   /* stuff used for matrix vector multiply */
1849   b->lvec      = PETSC_NULL;
1850   b->Mvctx     = PETSC_NULL;
1851 
1852   /* stuff for MatGetRow() */
1853   b->rowindices   = 0;
1854   b->rowvalues    = 0;
1855   b->getrowactive = PETSC_FALSE;
1856 
1857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1858                                      "MatStoreValues_MPIAIJ",
1859                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1861                                      "MatRetrieveValues_MPIAIJ",
1862                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1863   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1864                                      "MatGetDiagonalBlock_MPIAIJ",
1865                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1866   *A = B;
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNC__
1871 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ"
1872 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1873 {
1874   Mat        mat;
1875   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1876   int        ierr,len = 0;
1877   PetscTruth flg;
1878 
1879   PetscFunctionBegin;
1880   *newmat       = 0;
1881   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1882   PLogObjectCreate(mat);
1883   mat->data         = (void*)(a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1884   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1885   mat->factor       = matin->factor;
1886   mat->assembled    = PETSC_TRUE;
1887   mat->insertmode   = NOT_SET_VALUES;
1888 
1889   a->m = mat->m   = oldmat->m;
1890   a->n = mat->n   = oldmat->n;
1891   a->M = mat->M   = oldmat->M;
1892   a->N = mat->N   = oldmat->N;
1893 
1894   a->rstart       = oldmat->rstart;
1895   a->rend         = oldmat->rend;
1896   a->cstart       = oldmat->cstart;
1897   a->cend         = oldmat->cend;
1898   a->size         = oldmat->size;
1899   a->rank         = oldmat->rank;
1900   a->donotstash   = oldmat->donotstash;
1901   a->roworiented  = oldmat->roworiented;
1902   a->rowindices   = 0;
1903   a->rowvalues    = 0;
1904   a->getrowactive = PETSC_FALSE;
1905 
1906   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1907   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1908   a->cowners = a->rowners + a->size + 2;
1909   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1910   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1911   if (oldmat->colmap) {
1912 #if defined (PETSC_USE_CTABLE)
1913     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1914 #else
1915     a->colmap = (int*)PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1916     PLogObjectMemory(mat,(a->N)*sizeof(int));
1917     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1918 #endif
1919   } else a->colmap = 0;
1920   if (oldmat->garray) {
1921     len = ((Mat_SeqAIJ*)(oldmat->B->data))->n;
1922     a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1923     PLogObjectMemory(mat,len*sizeof(int));
1924     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1925   } else a->garray = 0;
1926 
1927   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1928   PLogObjectParent(mat,a->lvec);
1929   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1930   PLogObjectParent(mat,a->Mvctx);
1931   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1932   PLogObjectParent(mat,a->A);
1933   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1934   PLogObjectParent(mat,a->B);
1935   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1936   if (flg) {
1937     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1938   }
1939   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1940   *newmat = mat;
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #include "petscsys.h"
1945 
1946 #undef __FUNC__
1947 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ"
1948 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1949 {
1950   Mat          A;
1951   Scalar       *vals,*svals;
1952   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1953   MPI_Status   status;
1954   int          i,nz,ierr,j,rstart,rend,fd;
1955   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1956   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1957   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1958 
1959   PetscFunctionBegin;
1960   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1961   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1962   if (!rank) {
1963     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1964     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1965     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1966     if (header[3] < 0) {
1967       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1968     }
1969   }
1970 
1971   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1972   M = header[1]; N = header[2];
1973   /* determine ownership of all rows */
1974   m = M/size + ((M % size) > rank);
1975   rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1976   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1977   rowners[0] = 0;
1978   for (i=2; i<=size; i++) {
1979     rowners[i] += rowners[i-1];
1980   }
1981   rstart = rowners[rank];
1982   rend   = rowners[rank+1];
1983 
1984   /* distribute row lengths to all processors */
1985   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
1986   offlens = ourlens + (rend-rstart);
1987   if (!rank) {
1988     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1989     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1990     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1991     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1992     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1993     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1994   } else {
1995     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1996   }
1997 
1998   if (!rank) {
1999     /* calculate the number of nonzeros on each processor */
2000     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2001     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2002     for (i=0; i<size; i++) {
2003       for (j=rowners[i]; j< rowners[i+1]; j++) {
2004         procsnz[i] += rowlengths[j];
2005       }
2006     }
2007     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2008 
2009     /* determine max buffer needed and allocate it */
2010     maxnz = 0;
2011     for (i=0; i<size; i++) {
2012       maxnz = PetscMax(maxnz,procsnz[i]);
2013     }
2014     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2015 
2016     /* read in my part of the matrix column indices  */
2017     nz     = procsnz[0];
2018     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
2019     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2020 
2021     /* read in every one elses and ship off */
2022     for (i=1; i<size; i++) {
2023       nz   = procsnz[i];
2024       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2025       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2026     }
2027     ierr = PetscFree(cols);CHKERRQ(ierr);
2028   } else {
2029     /* determine buffer space needed for message */
2030     nz = 0;
2031     for (i=0; i<m; i++) {
2032       nz += ourlens[i];
2033     }
2034     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
2035 
2036     /* receive message of column indices*/
2037     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2038     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2039     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2040   }
2041 
2042   /* determine column ownership if matrix is not square */
2043   if (N != M) {
2044     n      = N/size + ((N % size) > rank);
2045     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2046     cstart = cend - n;
2047   } else {
2048     cstart = rstart;
2049     cend   = rend;
2050     n      = cend - cstart;
2051   }
2052 
2053   /* loop over local rows, determining number of off diagonal entries */
2054   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2055   jj = 0;
2056   for (i=0; i<m; i++) {
2057     for (j=0; j<ourlens[i]; j++) {
2058       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2059       jj++;
2060     }
2061   }
2062 
2063   /* create our matrix */
2064   for (i=0; i<m; i++) {
2065     ourlens[i] -= offlens[i];
2066   }
2067   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2068   A = *newmat;
2069   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2070   for (i=0; i<m; i++) {
2071     ourlens[i] += offlens[i];
2072   }
2073 
2074   if (!rank) {
2075     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
2076 
2077     /* read in my part of the matrix numerical values  */
2078     nz   = procsnz[0];
2079     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2080 
2081     /* insert into matrix */
2082     jj      = rstart;
2083     smycols = mycols;
2084     svals   = vals;
2085     for (i=0; i<m; i++) {
2086       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2087       smycols += ourlens[i];
2088       svals   += ourlens[i];
2089       jj++;
2090     }
2091 
2092     /* read in other processors and ship out */
2093     for (i=1; i<size; i++) {
2094       nz   = procsnz[i];
2095       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2096       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2097     }
2098     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2099   } else {
2100     /* receive numeric values */
2101     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
2102 
2103     /* receive message of values*/
2104     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2105     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2106     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2107 
2108     /* insert into matrix */
2109     jj      = rstart;
2110     smycols = mycols;
2111     svals   = vals;
2112     for (i=0; i<m; i++) {
2113       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2114       smycols += ourlens[i];
2115       svals   += ourlens[i];
2116       jj++;
2117     }
2118   }
2119   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2120   ierr = PetscFree(vals);CHKERRQ(ierr);
2121   ierr = PetscFree(mycols);CHKERRQ(ierr);
2122   ierr = PetscFree(rowners);CHKERRQ(ierr);
2123 
2124   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNC__
2130 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ"
2131 /*
2132     Not great since it makes two copies of the submatrix, first an SeqAIJ
2133   in local and then by concatenating the local matrices the end result.
2134   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2135 */
2136 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2137 {
2138   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2139   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
2140   Mat        *local,M,Mreuse;
2141   Scalar     *vwork,*aa;
2142   MPI_Comm   comm = mat->comm;
2143   Mat_SeqAIJ *aij;
2144 
2145 
2146   PetscFunctionBegin;
2147   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2148   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2149 
2150   if (call ==  MAT_REUSE_MATRIX) {
2151     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2152     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2153     local = &Mreuse;
2154     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2155   } else {
2156     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2157     Mreuse = *local;
2158     ierr = PetscFree(local);CHKERRQ(ierr);
2159   }
2160 
2161   /*
2162       m - number of local rows
2163       n - number of columns (same on all processors)
2164       rstart - first row in new global matrix generated
2165   */
2166   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2167   if (call == MAT_INITIAL_MATRIX) {
2168     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2169     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2170     ii  = aij->i;
2171     jj  = aij->j;
2172 
2173     /*
2174         Determine the number of non-zeros in the diagonal and off-diagonal
2175         portions of the matrix in order to do correct preallocation
2176     */
2177 
2178     /* first get start and end of "diagonal" columns */
2179     if (csize == PETSC_DECIDE) {
2180       nlocal = n/size + ((n % size) > rank);
2181     } else {
2182       nlocal = csize;
2183     }
2184     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2185     rstart = rend - nlocal;
2186     if (rank == size - 1 && rend != n) {
2187       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2188     }
2189 
2190     /* next, compute all the lengths */
2191     dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens);
2192     olens = dlens + m;
2193     for (i=0; i<m; i++) {
2194       jend = ii[i+1] - ii[i];
2195       olen = 0;
2196       dlen = 0;
2197       for (j=0; j<jend; j++) {
2198         if (*jj < rstart || *jj >= rend) olen++;
2199         else dlen++;
2200         jj++;
2201       }
2202       olens[i] = olen;
2203       dlens[i] = dlen;
2204     }
2205     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2206     ierr = PetscFree(dlens);CHKERRQ(ierr);
2207   } else {
2208     int ml,nl;
2209 
2210     M = *newmat;
2211     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2212     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2213     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2214     /*
2215          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2216        rather than the slower MatSetValues().
2217     */
2218     M->was_assembled = PETSC_TRUE;
2219     M->assembled     = PETSC_FALSE;
2220   }
2221   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2222   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2223   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2224   ii  = aij->i;
2225   jj  = aij->j;
2226   aa  = aij->a;
2227   for (i=0; i<m; i++) {
2228     row   = rstart + i;
2229     nz    = ii[i+1] - ii[i];
2230     cwork = jj;     jj += nz;
2231     vwork = aa;     aa += nz;
2232     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2233   }
2234 
2235   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2236   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2237   *newmat = M;
2238 
2239   /* save submatrix used in processor for next request */
2240   if (call ==  MAT_INITIAL_MATRIX) {
2241     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2242     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2243   }
2244 
2245   PetscFunctionReturn(0);
2246 }
2247