xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4ccb76dd2a06ade17c2800eec40bc3f6b9abd66f) !
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.294 1999/05/12 03:29:11 bsmith Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         PetscFree(a->a);  \
94         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
95         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
96         a->singlemalloc = 1; \
97  \
98         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
99         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
100         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
101         a->maxnz += CHUNKSIZE; \
102         a->reallocs++; \
103       } \
104       N = nrow++ - 1; a->nz++; \
105       /* shift up all the later entries in this row */ \
106       for ( ii=N; ii>=_i; ii-- ) { \
107         rp[ii+1] = rp[ii]; \
108         ap[ii+1] = ap[ii]; \
109       } \
110       rp[_i] = col1;  \
111       ap[_i] = value;  \
112       a_noinsert: ; \
113       ailen[row] = nrow; \
114 }
115 
116 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
117 { \
118  \
119     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
120     rmax = bimax[row]; nrow = bilen[row];  \
121     col1 = col - shift; \
122      \
123     low = 0; high = nrow; \
124     while (high-low > 5) { \
125       t = (low+high)/2; \
126       if (rp[t] > col) high = t; \
127       else             low  = t; \
128     } \
129        for ( _i=0; _i<nrow; _i++ ) { \
130         if (rp[_i] > col1) break; \
131         if (rp[_i] == col1) { \
132           if (addv == ADD_VALUES) ap[_i] += value;   \
133           else                  ap[_i] = value; \
134           goto b_noinsert; \
135         } \
136       }  \
137       if (nonew == 1) goto b_noinsert; \
138       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
156         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
157         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
158                                                            len*sizeof(int));CHKERRQ(ierr); \
159         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
160         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
161                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
162         /* free up old matrix storage */ \
163  \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
170         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
172         b->maxnz += CHUNKSIZE; \
173         b->reallocs++; \
174       } \
175       N = nrow++ - 1; b->nz++; \
176       /* shift up all the later entries in this row */ \
177       for ( ii=N; ii>=_i; ii-- ) { \
178         rp[ii+1] = rp[ii]; \
179         ap[ii+1] = ap[ii]; \
180       } \
181       rp[_i] = col1;  \
182       ap[_i] = value;  \
183       b_noinsert: ; \
184       bilen[row] = nrow; \
185 }
186 
187 #undef __FUNC__
188 #define __FUNC__ "MatSetValues_MPIAIJ"
189 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
190 {
191   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
192   Scalar     value;
193   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
194   int        cstart = aij->cstart, cend = aij->cend,row,col;
195   int        roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat        A = aij->A;
199   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
200   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
201   Scalar     *aa = a->a;
202 
203   Mat        B = aij->B;
204   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
205   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
206   Scalar     *ba = b->a;
207 
208   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
209   int        nonew = a->nonew,shift = a->indexshift;
210   Scalar     *ap;
211 
212   PetscFunctionBegin;
213   for ( i=0; i<m; i++ ) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_BOPT_g)
216     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for ( j=0; j<n; j++ ) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
225           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
226         }
227         else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_BOPT_g)
229         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
230 #endif
231         else {
232           if (mat->was_assembled) {
233             if (!aij->colmap) {
234               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235             }
236 #if defined (PETSC_USE_CTABLE)
237             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
238 	    col--;
239 #else
240             col = aij->colmap[in[j]] - 1;
241 #endif
242             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
243               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
244               col =  in[j];
245               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
246               B = aij->B;
247               b = (Mat_SeqAIJ *) B->data;
248               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
249               ba = b->a;
250             }
251           } else col = in[j];
252           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
253           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
254           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
255         }
256       }
257     } else {
258       if (!aij->donotstash) {
259         if (roworiented) {
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
263         }
264       }
265     }
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNC__
271 #define __FUNC__ "MatGetValues_MPIAIJ"
272 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
275   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
276   int        cstart = aij->cstart, cend = aij->cend,row,col;
277 
278   PetscFunctionBegin;
279   for ( i=0; i<m; i++ ) {
280     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
281     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
282     if (idxm[i] >= rstart && idxm[i] < rend) {
283       row = idxm[i] - rstart;
284       for ( j=0; j<n; j++ ) {
285         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
286         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
287         if (idxn[j] >= cstart && idxn[j] < cend){
288           col = idxn[j] - cstart;
289           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
290         } else {
291           if (!aij->colmap) {
292             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
293           }
294 #if defined (PETSC_USE_CTABLE)
295           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
296           col --;
297 #else
298           col = aij->colmap[idxn[j]] - 1;
299 #endif
300           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301           else {
302             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
303           }
304         }
305       }
306     } else {
307       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNC__
314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316 {
317   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
318   int         ierr,nstash,reallocs;
319   InsertMode  addv;
320 
321   PetscFunctionBegin;
322   if (aij->donotstash) {
323     PetscFunctionReturn(0);
324   }
325 
326   /* make sure all processors are either in INSERTMODE or ADDMODE */
327   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
328   if (addv == (ADD_VALUES|INSERT_VALUES)) {
329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
330   }
331   mat->insertmode = addv; /* in case this processor had no cache */
332 
333   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
334   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
335   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
336   PetscFunctionReturn(0);
337 }
338 
339 
340 #undef __FUNC__
341 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343 {
344   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
345   int         i,j,rstart,ncols,n,ierr,flg;
346   int         *row,*col,other_disassembled;
347   Scalar      *val;
348   InsertMode  addv = mat->insertmode;
349 
350   PetscFunctionBegin;
351   if (!aij->donotstash) {
352     while (1) {
353       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
354       if (!flg) break;
355 
356       for ( i=0; i<n; ) {
357         /* Now identify the consecutive vals belonging to the same row */
358         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
359         if (j < n) ncols = j-i;
360         else       ncols = n-i;
361         /* Now assemble all these values with a single function call */
362         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
363         i = j;
364       }
365     }
366     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
367   }
368 
369   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
370   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
371 
372   /* determine if any processor has disassembled, if so we must
373      also disassemble ourselfs, in order that we may reassemble. */
374   /*
375      if nonzero structure of submatrix B cannot change then we know that
376      no processor disassembled thus we can skip this stuff
377   */
378   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
379     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
380     if (mat->was_assembled && !other_disassembled) {
381       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
382     }
383   }
384 
385   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
386     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
387   }
388   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
389   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
390 
391   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
392   PetscFunctionReturn(0);
393 }
394 
395 #undef __FUNC__
396 #define __FUNC__ "MatZeroEntries_MPIAIJ"
397 int MatZeroEntries_MPIAIJ(Mat A)
398 {
399   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
400   int        ierr;
401 
402   PetscFunctionBegin;
403   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
404   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatZeroRows_MPIAIJ"
410 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
411 {
412   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
413   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
414   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
415   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
416   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
417   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
418   MPI_Comm       comm = A->comm;
419   MPI_Request    *send_waits,*recv_waits;
420   MPI_Status     recv_status,*send_status;
421   IS             istmp;
422 
423   PetscFunctionBegin;
424   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
425   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
426 
427   /*  first count number of contributors to each processor */
428   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
429   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
430   procs  = nprocs + size;
431   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
432   for ( i=0; i<N; i++ ) {
433     idx = rows[i];
434     found = 0;
435     for ( j=0; j<size; j++ ) {
436       if (idx >= owners[j] && idx < owners[j+1]) {
437         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
438       }
439     }
440     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
441   }
442   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
443 
444   /* inform other processors of number of messages and max length*/
445   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
446   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
447   nrecvs = work[rank];
448   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
449   nmax = work[rank];
450   PetscFree(work);
451 
452   /* post receives:   */
453   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
454   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
455   for ( i=0; i<nrecvs; i++ ) {
456     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
457   }
458 
459   /* do sends:
460       1) starts[i] gives the starting index in svalues for stuff going to
461          the ith processor
462   */
463   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
464   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
465   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
466   starts[0] = 0;
467   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
468   for ( i=0; i<N; i++ ) {
469     svalues[starts[owner[i]]++] = rows[i];
470   }
471   ISRestoreIndices(is,&rows);
472 
473   starts[0] = 0;
474   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
475   count = 0;
476   for ( i=0; i<size; i++ ) {
477     if (procs[i]) {
478       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
479     }
480   }
481   PetscFree(starts);
482 
483   base = owners[rank];
484 
485   /*  wait on receives */
486   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
487   source = lens + nrecvs;
488   count  = nrecvs; slen = 0;
489   while (count) {
490     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
491     /* unpack receives into our local space */
492     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
493     source[imdex]  = recv_status.MPI_SOURCE;
494     lens[imdex]  = n;
495     slen += n;
496     count--;
497   }
498   PetscFree(recv_waits);
499 
500   /* move the data into the send scatter */
501   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
502   count = 0;
503   for ( i=0; i<nrecvs; i++ ) {
504     values = rvalues + i*nmax;
505     for ( j=0; j<lens[i]; j++ ) {
506       lrows[count++] = values[j] - base;
507     }
508   }
509   PetscFree(rvalues); PetscFree(lens);
510   PetscFree(owner); PetscFree(nprocs);
511 
512   /* actually zap the local rows */
513   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
514   PLogObjectParent(A,istmp);
515 
516   /*
517         Zero the required rows. If the "diagonal block" of the matrix
518      is square and the user wishes to set the diagonal we use seperate
519      code so that MatSetValues() is not called for each diagonal allocating
520      new memory, thus calling lots of mallocs and slowing things down.
521 
522        Contributed by: Mathew Knepley
523   */
524   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
525   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
526   if (diag && (l->A->M == l->A->N)) {
527     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
528   } else if (diag) {
529     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
530     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
531       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
532 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
533     }
534     for ( i = 0; i < slen; i++ ) {
535       row  = lrows[i] + rstart;
536       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
537     }
538     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
539     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
540   } else {
541     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
542   }
543   ierr = ISDestroy(istmp);CHKERRQ(ierr);
544   PetscFree(lrows);
545 
546   /* wait on sends */
547   if (nsends) {
548     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
549     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
550     PetscFree(send_status);
551   }
552   PetscFree(send_waits); PetscFree(svalues);
553 
554   PetscFunctionReturn(0);
555 }
556 
557 #undef __FUNC__
558 #define __FUNC__ "MatMult_MPIAIJ"
559 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
560 {
561   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
562   int        ierr,nt;
563 
564   PetscFunctionBegin;
565   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
566   if (nt != a->n) {
567     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
568   }
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNC__
577 #define __FUNC__ "MatMultAdd_MPIAIJ"
578 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
579 {
580   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
581   int        ierr;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNC__
592 #define __FUNC__ "MatMultTrans_MPIAIJ"
593 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   /* do nondiagonal part */
600   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
601   /* send it on its way */
602   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   /* do local part */
604   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
605   /* receive remote parts: note this assumes the values are not actually */
606   /* inserted in yy until the next line, which is true for my implementation*/
607   /* but is not perhaps always true. */
608   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNC__
613 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
614 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 /*
634   This only works correctly for square matrices where the subblock A->A is the
635    diagonal block
636 */
637 #undef __FUNC__
638 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
639 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
640 {
641   int        ierr;
642   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
643 
644   PetscFunctionBegin;
645   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
646   if (a->rstart != a->cstart || a->rend != a->cend) {
647     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
648   }
649   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNC__
654 #define __FUNC__ "MatScale_MPIAIJ"
655 int MatScale_MPIAIJ(Scalar *aa,Mat A)
656 {
657   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
658   int        ierr;
659 
660   PetscFunctionBegin;
661   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
662   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNC__
667 #define __FUNC__ "MatDestroy_MPIAIJ"
668 int MatDestroy_MPIAIJ(Mat mat)
669 {
670   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
671   int        ierr;
672 
673   PetscFunctionBegin;
674   if (--mat->refct > 0) PetscFunctionReturn(0);
675 
676   if (mat->mapping) {
677     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
678   }
679   if (mat->bmapping) {
680     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
681   }
682   if (mat->rmap) {
683     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
684   }
685   if (mat->cmap) {
686     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
687   }
688 #if defined(PETSC_USE_LOG)
689   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   PetscFree(aij->rowners);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) TableDelete(aij->colmap);
697 #else
698   if (aij->colmap) PetscFree(aij->colmap);
699 #endif
700   if (aij->garray) PetscFree(aij->garray);
701   if (aij->lvec)   VecDestroy(aij->lvec);
702   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
703   if (aij->rowvalues) PetscFree(aij->rowvalues);
704   PetscFree(aij);
705   PLogObjectDestroy(mat);
706   PetscHeaderDestroy(mat);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNC__
711 #define __FUNC__ "MatView_MPIAIJ_Binary"
712 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
713 {
714   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
715   int         ierr;
716 
717   PetscFunctionBegin;
718   if (aij->size == 1) {
719     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
720   }
721   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
728 {
729   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
730   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
731   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
732   int         size = aij->size;
733   FILE        *fd;
734   ViewerType  vtype;
735 
736   PetscFunctionBegin;
737   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
738   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
739     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
740     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
741       MatInfo info;
742       int     flg;
743       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
744       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
745       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
746       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
747       PetscSequentialPhaseBegin(mat->comm,1);
748       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
749          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
750       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
751          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
752       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
753       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
754       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
755       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
756       fflush(fd);
757       PetscSequentialPhaseEnd(mat->comm,1);
758       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
759       PetscFunctionReturn(0);
760     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
761       PetscFunctionReturn(0);
762     }
763   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
764     Draw       draw;
765     PetscTruth isnull;
766     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
767     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
768   }
769 
770   if (size == 1) {
771     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
772   } else {
773     /* assemble the entire matrix onto first processor. */
774     Mat         A;
775     Mat_SeqAIJ *Aloc;
776     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
777     Scalar      *a;
778 
779     if (!rank) {
780       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
781     } else {
782       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
783     }
784     PLogObjectParent(mat,A);
785 
786     /* copy over the A part */
787     Aloc = (Mat_SeqAIJ*) aij->A->data;
788     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
789     row = aij->rstart;
790     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
791     for ( i=0; i<m; i++ ) {
792       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
793       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
794     }
795     aj = Aloc->j;
796     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
797 
798     /* copy over the B part */
799     Aloc = (Mat_SeqAIJ*) aij->B->data;
800     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
801     row = aij->rstart;
802     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
803     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
804     for ( i=0; i<m; i++ ) {
805       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
806       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
807     }
808     PetscFree(ct);
809     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811     /*
812        Everyone has to call to draw the matrix since the graphics waits are
813        synchronized across all processors that share the Draw object
814     */
815     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
816       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
817     }
818     ierr = MatDestroy(A);CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNC__
824 #define __FUNC__ "MatView_MPIAIJ"
825 int MatView_MPIAIJ(Mat mat,Viewer viewer)
826 {
827   int         ierr;
828   ViewerType  vtype;
829 
830   PetscFunctionBegin;
831   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
832   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
833       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
834     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
835   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
836     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
837   } else {
838     SETERRQ(1,1,"Viewer type not supported by PETSc object");
839   }
840   PetscFunctionReturn(0);
841 }
842 
843 /*
844     This has to provide several versions.
845 
846      2) a) use only local smoothing updating outer values only once.
847         b) local smoothing updating outer values each inner iteration
848      3) color updating out values betwen colors.
849 */
850 #undef __FUNC__
851 #define __FUNC__ "MatRelax_MPIAIJ"
852 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
853                            double fshift,int its,Vec xx)
854 {
855   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
856   Mat        AA = mat->A, BB = mat->B;
857   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
858   Scalar     *b,*x,*xs,*ls,d,*v,sum;
859   int        ierr,*idx, *diag;
860   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
861 
862   PetscFunctionBegin;
863   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
864   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
865   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
866   xs = x + shift; /* shift by one for index start of 1 */
867   ls = ls + shift;
868   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
869   diag = A->diag;
870   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
873       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
874       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
875       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
876       PetscFunctionReturn(0);
877     }
878     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
879     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880     while (its--) {
881       /* go down through the rows */
882       for ( i=0; i<m; i++ ) {
883         n    = A->i[i+1] - A->i[i];
884 	PLogFlops(4*n+3);
885         idx  = A->j + A->i[i] + shift;
886         v    = A->a + A->i[i] + shift;
887         sum  = b[i];
888         SPARSEDENSEMDOT(sum,xs,v,idx,n);
889         d    = fshift + A->a[diag[i]+shift];
890         n    = B->i[i+1] - B->i[i];
891         idx  = B->j + B->i[i] + shift;
892         v    = B->a + B->i[i] + shift;
893         SPARSEDENSEMDOT(sum,ls,v,idx,n);
894         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
895       }
896       /* come up through the rows */
897       for ( i=m-1; i>-1; i-- ) {
898         n    = A->i[i+1] - A->i[i];
899 	PLogFlops(4*n+3)
900         idx  = A->j + A->i[i] + shift;
901         v    = A->a + A->i[i] + shift;
902         sum  = b[i];
903         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904         d    = fshift + A->a[diag[i]+shift];
905         n    = B->i[i+1] - B->i[i];
906         idx  = B->j + B->i[i] + shift;
907         v    = B->a + B->i[i] + shift;
908         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
910       }
911     }
912   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
913     if (flag & SOR_ZERO_INITIAL_GUESS) {
914       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
915       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
916       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
917       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
918       PetscFunctionReturn(0);
919     }
920     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
921     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
922     while (its--) {
923       for ( i=0; i<m; i++ ) {
924         n    = A->i[i+1] - A->i[i];
925 	PLogFlops(4*n+3);
926         idx  = A->j + A->i[i] + shift;
927         v    = A->a + A->i[i] + shift;
928         sum  = b[i];
929         SPARSEDENSEMDOT(sum,xs,v,idx,n);
930         d    = fshift + A->a[diag[i]+shift];
931         n    = B->i[i+1] - B->i[i];
932         idx  = B->j + B->i[i] + shift;
933         v    = B->a + B->i[i] + shift;
934         SPARSEDENSEMDOT(sum,ls,v,idx,n);
935         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
936       }
937     }
938   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
939     if (flag & SOR_ZERO_INITIAL_GUESS) {
940       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
941       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
942       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
943       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
944       PetscFunctionReturn(0);
945     }
946     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
947     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
948     while (its--) {
949       for ( i=m-1; i>-1; i-- ) {
950         n    = A->i[i+1] - A->i[i];
951 	PLogFlops(4*n+3);
952         idx  = A->j + A->i[i] + shift;
953         v    = A->a + A->i[i] + shift;
954         sum  = b[i];
955         SPARSEDENSEMDOT(sum,xs,v,idx,n);
956         d    = fshift + A->a[diag[i]+shift];
957         n    = B->i[i+1] - B->i[i];
958         idx  = B->j + B->i[i] + shift;
959         v    = B->a + B->i[i] + shift;
960         SPARSEDENSEMDOT(sum,ls,v,idx,n);
961         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
962       }
963     }
964   } else {
965     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
966   }
967   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
968   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
969   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNC__
974 #define __FUNC__ "MatGetInfo_MPIAIJ"
975 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
976 {
977   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
978   Mat        A = mat->A, B = mat->B;
979   int        ierr;
980   double     isend[5], irecv[5];
981 
982   PetscFunctionBegin;
983   info->block_size     = 1.0;
984   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
985   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
986   isend[3] = info->memory;  isend[4] = info->mallocs;
987   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
988   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
989   isend[3] += info->memory;  isend[4] += info->mallocs;
990   if (flag == MAT_LOCAL) {
991     info->nz_used      = isend[0];
992     info->nz_allocated = isend[1];
993     info->nz_unneeded  = isend[2];
994     info->memory       = isend[3];
995     info->mallocs      = isend[4];
996   } else if (flag == MAT_GLOBAL_MAX) {
997     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
998     info->nz_used      = irecv[0];
999     info->nz_allocated = irecv[1];
1000     info->nz_unneeded  = irecv[2];
1001     info->memory       = irecv[3];
1002     info->mallocs      = irecv[4];
1003   } else if (flag == MAT_GLOBAL_SUM) {
1004     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1005     info->nz_used      = irecv[0];
1006     info->nz_allocated = irecv[1];
1007     info->nz_unneeded  = irecv[2];
1008     info->memory       = irecv[3];
1009     info->mallocs      = irecv[4];
1010   }
1011   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1012   info->fill_ratio_needed = 0;
1013   info->factor_mallocs    = 0;
1014   info->rows_global       = (double)mat->M;
1015   info->columns_global    = (double)mat->N;
1016   info->rows_local        = (double)mat->m;
1017   info->columns_local     = (double)mat->N;
1018 
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNC__
1023 #define __FUNC__ "MatSetOption_MPIAIJ"
1024 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1025 {
1026   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1027   int        ierr;
1028 
1029   PetscFunctionBegin;
1030   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1031       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1032       op == MAT_COLUMNS_UNSORTED ||
1033       op == MAT_COLUMNS_SORTED ||
1034       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1035       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1036         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1037         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1038   } else if (op == MAT_ROW_ORIENTED) {
1039     a->roworiented = 1;
1040     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1041     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1042   } else if (op == MAT_ROWS_SORTED ||
1043              op == MAT_ROWS_UNSORTED ||
1044              op == MAT_SYMMETRIC ||
1045              op == MAT_STRUCTURALLY_SYMMETRIC ||
1046              op == MAT_YES_NEW_DIAGONALS)
1047     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1048   else if (op == MAT_COLUMN_ORIENTED) {
1049     a->roworiented = 0;
1050     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1051     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1052   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1053     a->donotstash = 1;
1054   } else if (op == MAT_NO_NEW_DIAGONALS){
1055     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1056   } else {
1057     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1058   }
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNC__
1063 #define __FUNC__ "MatGetSize_MPIAIJ"
1064 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1065 {
1066   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1067 
1068   PetscFunctionBegin;
1069   if (m) *m = mat->M;
1070   if (n) *n = mat->N;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNC__
1075 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1076 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1077 {
1078   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1079 
1080   PetscFunctionBegin;
1081   if (m) *m = mat->m;
1082   if (n) *n = mat->n;
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNC__
1087 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1088 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1089 {
1090   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1091 
1092   PetscFunctionBegin;
1093   *m = mat->rstart; *n = mat->rend;
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNC__
1098 #define __FUNC__ "MatGetRow_MPIAIJ"
1099 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1100 {
1101   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1102   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1103   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1104   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1105   int        *cmap, *idx_p;
1106 
1107   PetscFunctionBegin;
1108   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1109   mat->getrowactive = PETSC_TRUE;
1110 
1111   if (!mat->rowvalues && (idx || v)) {
1112     /*
1113         allocate enough space to hold information from the longest row.
1114     */
1115     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1116     int     max = 1,m = mat->m,tmp;
1117     for ( i=0; i<m; i++ ) {
1118       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1119       if (max < tmp) { max = tmp; }
1120     }
1121     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1122     mat->rowindices = (int *) (mat->rowvalues + max);
1123   }
1124 
1125   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1126   lrow = row - rstart;
1127 
1128   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1129   if (!v)   {pvA = 0; pvB = 0;}
1130   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1131   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1132   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1133   nztot = nzA + nzB;
1134 
1135   cmap  = mat->garray;
1136   if (v  || idx) {
1137     if (nztot) {
1138       /* Sort by increasing column numbers, assuming A and B already sorted */
1139       int imark = -1;
1140       if (v) {
1141         *v = v_p = mat->rowvalues;
1142         for ( i=0; i<nzB; i++ ) {
1143           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1144           else break;
1145         }
1146         imark = i;
1147         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1148         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1149       }
1150       if (idx) {
1151         *idx = idx_p = mat->rowindices;
1152         if (imark > -1) {
1153           for ( i=0; i<imark; i++ ) {
1154             idx_p[i] = cmap[cworkB[i]];
1155           }
1156         } else {
1157           for ( i=0; i<nzB; i++ ) {
1158             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1159             else break;
1160           }
1161           imark = i;
1162         }
1163         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1164         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1165       }
1166     } else {
1167       if (idx) *idx = 0;
1168       if (v)   *v   = 0;
1169     }
1170   }
1171   *nz = nztot;
1172   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1173   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNC__
1178 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1179 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1180 {
1181   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1182 
1183   PetscFunctionBegin;
1184   if (aij->getrowactive == PETSC_FALSE) {
1185     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1186   }
1187   aij->getrowactive = PETSC_FALSE;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNC__
1192 #define __FUNC__ "MatNorm_MPIAIJ"
1193 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1194 {
1195   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1196   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1197   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1198   double     sum = 0.0;
1199   Scalar     *v;
1200 
1201   PetscFunctionBegin;
1202   if (aij->size == 1) {
1203     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1204   } else {
1205     if (type == NORM_FROBENIUS) {
1206       v = amat->a;
1207       for (i=0; i<amat->nz; i++ ) {
1208 #if defined(PETSC_USE_COMPLEX)
1209         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1210 #else
1211         sum += (*v)*(*v); v++;
1212 #endif
1213       }
1214       v = bmat->a;
1215       for (i=0; i<bmat->nz; i++ ) {
1216 #if defined(PETSC_USE_COMPLEX)
1217         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1218 #else
1219         sum += (*v)*(*v); v++;
1220 #endif
1221       }
1222       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1223       *norm = sqrt(*norm);
1224     } else if (type == NORM_1) { /* max column norm */
1225       double *tmp, *tmp2;
1226       int    *jj, *garray = aij->garray;
1227       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1228       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1229       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1230       *norm = 0.0;
1231       v = amat->a; jj = amat->j;
1232       for ( j=0; j<amat->nz; j++ ) {
1233         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1234       }
1235       v = bmat->a; jj = bmat->j;
1236       for ( j=0; j<bmat->nz; j++ ) {
1237         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1238       }
1239       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1240       for ( j=0; j<aij->N; j++ ) {
1241         if (tmp2[j] > *norm) *norm = tmp2[j];
1242       }
1243       PetscFree(tmp); PetscFree(tmp2);
1244     } else if (type == NORM_INFINITY) { /* max row norm */
1245       double ntemp = 0.0;
1246       for ( j=0; j<amat->m; j++ ) {
1247         v = amat->a + amat->i[j] + shift;
1248         sum = 0.0;
1249         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1250           sum += PetscAbsScalar(*v); v++;
1251         }
1252         v = bmat->a + bmat->i[j] + shift;
1253         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1254           sum += PetscAbsScalar(*v); v++;
1255         }
1256         if (sum > ntemp) ntemp = sum;
1257       }
1258       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1259     } else {
1260       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1261     }
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNC__
1267 #define __FUNC__ "MatTranspose_MPIAIJ"
1268 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1269 {
1270   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1271   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1272   int        ierr,shift = Aloc->indexshift;
1273   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1274   Mat        B;
1275   Scalar     *array;
1276 
1277   PetscFunctionBegin;
1278   if (matout == PETSC_NULL && M != N) {
1279     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1280   }
1281 
1282   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1283 
1284   /* copy over the A part */
1285   Aloc = (Mat_SeqAIJ*) a->A->data;
1286   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1287   row = a->rstart;
1288   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1289   for ( i=0; i<m; i++ ) {
1290     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1291     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1292   }
1293   aj = Aloc->j;
1294   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1295 
1296   /* copy over the B part */
1297   Aloc = (Mat_SeqAIJ*) a->B->data;
1298   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1299   row = a->rstart;
1300   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1301   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1302   for ( i=0; i<m; i++ ) {
1303     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1304     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1305   }
1306   PetscFree(ct);
1307   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1308   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1309   if (matout != PETSC_NULL) {
1310     *matout = B;
1311   } else {
1312     PetscOps       *Abops;
1313     struct _MatOps *Aops;
1314 
1315     /* This isn't really an in-place transpose .... but free data structures from a */
1316     PetscFree(a->rowners);
1317     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1318     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1319 #if defined (PETSC_USE_CTABLE)
1320     if (a->colmap) TableDelete(a->colmap);
1321 #else
1322     if (a->colmap) PetscFree(a->colmap);
1323 #endif
1324     if (a->garray) PetscFree(a->garray);
1325     if (a->lvec) VecDestroy(a->lvec);
1326     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1327     PetscFree(a);
1328 
1329     /*
1330        This is horrible, horrible code. We need to keep the
1331       A pointers for the bops and ops but copy everything
1332       else from C.
1333     */
1334     Abops   = A->bops;
1335     Aops    = A->ops;
1336     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1337     A->bops = Abops;
1338     A->ops  = Aops;
1339     PetscHeaderDestroy(B);
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNC__
1345 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1346 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1347 {
1348   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1349   Mat a = aij->A, b = aij->B;
1350   int ierr,s1,s2,s3;
1351 
1352   PetscFunctionBegin;
1353   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1354   if (rr) {
1355     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1356     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1357     /* Overlap communication with computation. */
1358     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1359   }
1360   if (ll) {
1361     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1362     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1363     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1364   }
1365   /* scale  the diagonal block */
1366   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1367 
1368   if (rr) {
1369     /* Do a scatter end and then right scale the off-diagonal block */
1370     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1371     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1372   }
1373 
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 
1378 #undef __FUNC__
1379 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1380 int MatPrintHelp_MPIAIJ(Mat A)
1381 {
1382   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1383   int        ierr;
1384 
1385   PetscFunctionBegin;
1386   if (!a->rank) {
1387     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNC__
1393 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1394 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1395 {
1396   PetscFunctionBegin;
1397   *bs = 1;
1398   PetscFunctionReturn(0);
1399 }
1400 #undef __FUNC__
1401 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1402 int MatSetUnfactored_MPIAIJ(Mat A)
1403 {
1404   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1405   int        ierr;
1406 
1407   PetscFunctionBegin;
1408   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNC__
1413 #define __FUNC__ "MatEqual_MPIAIJ"
1414 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1415 {
1416   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1417   Mat        a, b, c, d;
1418   PetscTruth flg;
1419   int        ierr;
1420 
1421   PetscFunctionBegin;
1422   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1423   a = matA->A; b = matA->B;
1424   c = matB->A; d = matB->B;
1425 
1426   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1427   if (flg == PETSC_TRUE) {
1428     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1429   }
1430   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNC__
1435 #define __FUNC__ "MatCopy_MPIAIJ"
1436 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1437 {
1438   int        ierr;
1439   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1440   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1441 
1442   PetscFunctionBegin;
1443   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1444     /* because of the column compression in the off-processor part of the matrix a->B,
1445        the number of columns in a->B and b->B may be different, hence we cannot call
1446        the MatCopy() directly on the two parts. If need be, we can provide a more
1447        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1448        then copying the submatrices */
1449     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1450   } else {
1451     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1452     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1458 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1459 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1460 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1461 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1462 
1463 /* -------------------------------------------------------------------*/
1464 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1465        MatGetRow_MPIAIJ,
1466        MatRestoreRow_MPIAIJ,
1467        MatMult_MPIAIJ,
1468        MatMultAdd_MPIAIJ,
1469        MatMultTrans_MPIAIJ,
1470        MatMultTransAdd_MPIAIJ,
1471        0,
1472        0,
1473        0,
1474        0,
1475        0,
1476        0,
1477        MatRelax_MPIAIJ,
1478        MatTranspose_MPIAIJ,
1479        MatGetInfo_MPIAIJ,
1480        MatEqual_MPIAIJ,
1481        MatGetDiagonal_MPIAIJ,
1482        MatDiagonalScale_MPIAIJ,
1483        MatNorm_MPIAIJ,
1484        MatAssemblyBegin_MPIAIJ,
1485        MatAssemblyEnd_MPIAIJ,
1486        0,
1487        MatSetOption_MPIAIJ,
1488        MatZeroEntries_MPIAIJ,
1489        MatZeroRows_MPIAIJ,
1490        0,
1491        0,
1492        0,
1493        0,
1494        MatGetSize_MPIAIJ,
1495        MatGetLocalSize_MPIAIJ,
1496        MatGetOwnershipRange_MPIAIJ,
1497        0,
1498        0,
1499        0,
1500        0,
1501        MatDuplicate_MPIAIJ,
1502        0,
1503        0,
1504        0,
1505        0,
1506        0,
1507        MatGetSubMatrices_MPIAIJ,
1508        MatIncreaseOverlap_MPIAIJ,
1509        MatGetValues_MPIAIJ,
1510        MatCopy_MPIAIJ,
1511        MatPrintHelp_MPIAIJ,
1512        MatScale_MPIAIJ,
1513        0,
1514        0,
1515        0,
1516        MatGetBlockSize_MPIAIJ,
1517        0,
1518        0,
1519        0,
1520        0,
1521        MatFDColoringCreate_MPIAIJ,
1522        0,
1523        MatSetUnfactored_MPIAIJ,
1524        0,
1525        0,
1526        MatGetSubMatrix_MPIAIJ,
1527        0,
1528        0,
1529        MatGetMaps_Petsc};
1530 
1531 /* ----------------------------------------------------------------------------------------*/
1532 
1533 EXTERN_C_BEGIN
1534 #undef __FUNC__
1535 #define __FUNC__ "MatStoreValues_MPIAIJ"
1536 int MatStoreValues_MPIAIJ(Mat mat)
1537 {
1538   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1539   int        ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1543   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 EXTERN_C_END
1547 
1548 EXTERN_C_BEGIN
1549 #undef __FUNC__
1550 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1551 int MatRetrieveValues_MPIAIJ(Mat mat)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1554   int        ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1558   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 EXTERN_C_END
1562 
1563 #include "pc.h"
1564 EXTERN_C_BEGIN
1565 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1566 EXTERN_C_END
1567 
1568 #undef __FUNC__
1569 #define __FUNC__ "MatCreateMPIAIJ"
1570 /*@C
1571    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1572    (the default parallel PETSc format).  For good matrix assembly performance
1573    the user should preallocate the matrix storage by setting the parameters
1574    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1575    performance can be increased by more than a factor of 50.
1576 
1577    Collective on MPI_Comm
1578 
1579    Input Parameters:
1580 +  comm - MPI communicator
1581 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1582            This value should be the same as the local size used in creating the
1583            y vector for the matrix-vector product y = Ax.
1584 .  n - This value should be the same as the local size used in creating the
1585        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1586        calculated if N is given) For square matrices n is almost always m.
1587 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1588 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1589 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1590            (same value is used for all local rows)
1591 .  d_nnz - array containing the number of nonzeros in the various rows of the
1592            DIAGONAL portion of the local submatrix (possibly different for each row)
1593            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1594            The size of this array is equal to the number of local rows, i.e 'm'.
1595            You must leave room for the diagonal entry even if it is zero.
1596 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1597            submatrix (same value is used for all local rows).
1598 -  o_nnz - array containing the number of nonzeros in the various rows of the
1599            OFF-DIAGONAL portion of the local submatrix (possibly different for
1600            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1601            structure. The size of this array is equal to the number
1602            of local rows, i.e 'm'.
1603 
1604    Output Parameter:
1605 .  A - the matrix
1606 
1607    Notes:
1608    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1609    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1610    storage requirements for this matrix.
1611 
1612    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1613    processor than it must be used on all processors that share the object for
1614    that argument.
1615 
1616    The AIJ format (also called the Yale sparse matrix format or
1617    compressed row storage), is fully compatible with standard Fortran 77
1618    storage.  That is, the stored row and column indices can begin at
1619    either one (as in Fortran) or zero.  See the users manual for details.
1620 
1621    The user MUST specify either the local or global matrix dimensions
1622    (possibly both).
1623 
1624    The parallel matrix is partitioned such that the first m0 rows belong to
1625    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1626    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1627 
1628    The DIAGONAL portion of the local submatrix of a processor can be defined
1629    as the submatrix which is obtained by extraction the part corresponding
1630    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1631    first row that belongs to the processor, and r2 is the last row belonging
1632    to the this processor. This is a square mxm matrix. The remaining portion
1633    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1634 
1635    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1636 
1637    By default, this format uses inodes (identical nodes) when possible.
1638    We search for consecutive rows with the same nonzero structure, thereby
1639    reusing matrix information to achieve increased efficiency.
1640 
1641    Options Database Keys:
1642 +  -mat_aij_no_inode  - Do not use inodes
1643 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1644 -  -mat_aij_oneindex - Internally use indexing starting at 1
1645         rather than 0.  Note that when calling MatSetValues(),
1646         the user still MUST index entries starting at 0!
1647 .   -mat_mpi - use the parallel matrix data structures even on one processor
1648                (defaults to using SeqBAIJ format on one processor)
1649 .   -mat_mpi - use the parallel matrix data structures even on one processor
1650                (defaults to using SeqAIJ format on one processor)
1651 
1652 
1653    Example usage:
1654 
1655    Consider the following 8x8 matrix with 34 non-zero values, that is
1656    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1657    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1658    as follows:
1659 
1660 .vb
1661             1  2  0  |  0  3  0  |  0  4
1662     Proc0   0  5  6  |  7  0  0  |  8  0
1663             9  0 10  | 11  0  0  | 12  0
1664     -------------------------------------
1665            13  0 14  | 15 16 17  |  0  0
1666     Proc1   0 18  0  | 19 20 21  |  0  0
1667             0  0  0  | 22 23  0  | 24  0
1668     -------------------------------------
1669     Proc2  25 26 27  |  0  0 28  | 29  0
1670            30  0  0  | 31 32 33  |  0 34
1671 .ve
1672 
1673    This can be represented as a collection of submatrices as:
1674 
1675 .vb
1676       A B C
1677       D E F
1678       G H I
1679 .ve
1680 
1681    Where the submatrices A,B,C are owned by proc0, D,E,F are
1682    owned by proc1, G,H,I are owned by proc2.
1683 
1684    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1685    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1686    The 'M','N' parameters are 8,8, and have the same values on all procs.
1687 
1688    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1689    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1690    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1691    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1692    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1693    matrix, ans [DF] as another SeqAIJ matrix.
1694 
1695    When d_nz, o_nz parameters are specified, d_nz storage elements are
1696    allocated for every row of the local diagonal submatrix, and o_nz
1697    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1698    One way to choose d_nz and o_nz is to use the max nonzerors per local
1699    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1700    In this case, the values of d_nz,o_nz are:
1701 .vb
1702      proc0 : dnz = 2, o_nz = 2
1703      proc1 : dnz = 3, o_nz = 2
1704      proc2 : dnz = 1, o_nz = 4
1705 .ve
1706    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1707    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1708    for proc3. i.e we are using 12+15+10=37 storage locations to store
1709    34 values.
1710 
1711    When d_nnz, o_nnz parameters are specified, the storage is specified
1712    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1713    In the above case the values for d_nnz,o_nnz are:
1714 .vb
1715      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1716      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1717      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1718 .ve
1719    Here the space allocated is sum of all the above values i.e 34, and
1720    hence pre-allocation is perfect.
1721 
1722    Level: intermediate
1723 
1724 .keywords: matrix, aij, compressed row, sparse, parallel
1725 
1726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1727 @*/
1728 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1729 {
1730   Mat          B;
1731   Mat_MPIAIJ   *b;
1732   int          ierr, i,size,flag1 = 0, flag2 = 0;
1733 
1734   PetscFunctionBegin;
1735   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1736   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1737   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1738   if (!flag1 && !flag2 && size == 1) {
1739     if (M == PETSC_DECIDE) M = m;
1740     if (N == PETSC_DECIDE) N = n;
1741     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1742     PetscFunctionReturn(0);
1743   }
1744 
1745   *A = 0;
1746   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1747   PLogObjectCreate(B);
1748   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1749   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1750   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1751   B->ops->destroy = MatDestroy_MPIAIJ;
1752   B->ops->view    = MatView_MPIAIJ;
1753   B->factor       = 0;
1754   B->assembled    = PETSC_FALSE;
1755   B->mapping      = 0;
1756 
1757   B->insertmode      = NOT_SET_VALUES;
1758   b->size            = size;
1759   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1760 
1761   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1762     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1763   }
1764 
1765   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1766   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1767   b->m = m; B->m = m;
1768   b->n = n; B->n = n;
1769   b->N = N; B->N = N;
1770   b->M = M; B->M = M;
1771 
1772   /* the information in the maps duplicates the information computed below, eventually
1773      we should remove the duplicate information that is not contained in the maps */
1774   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1775   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1776 
1777   /* build local table of row and column ownerships */
1778   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1779   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1780   b->cowners = b->rowners + b->size + 2;
1781   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1782   b->rowners[0] = 0;
1783   for ( i=2; i<=b->size; i++ ) {
1784     b->rowners[i] += b->rowners[i-1];
1785   }
1786   b->rstart = b->rowners[b->rank];
1787   b->rend   = b->rowners[b->rank+1];
1788   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1789   b->cowners[0] = 0;
1790   for ( i=2; i<=b->size; i++ ) {
1791     b->cowners[i] += b->cowners[i-1];
1792   }
1793   b->cstart = b->cowners[b->rank];
1794   b->cend   = b->cowners[b->rank+1];
1795 
1796   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1797   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1798   PLogObjectParent(B,b->A);
1799   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1800   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1801   PLogObjectParent(B,b->B);
1802 
1803   /* build cache for off array entries formed */
1804   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1805   b->donotstash  = 0;
1806   b->colmap      = 0;
1807   b->garray      = 0;
1808   b->roworiented = 1;
1809 
1810   /* stuff used for matrix vector multiply */
1811   b->lvec      = 0;
1812   b->Mvctx     = 0;
1813 
1814   /* stuff for MatGetRow() */
1815   b->rowindices   = 0;
1816   b->rowvalues    = 0;
1817   b->getrowactive = PETSC_FALSE;
1818 
1819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1820                                      "MatStoreValues_MPIAIJ",
1821                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1823                                      "MatRetrieveValues_MPIAIJ",
1824                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1826                                      "MatGetDiagonalBlock_MPIAIJ",
1827                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1828   *A = B;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNC__
1833 #define __FUNC__ "MatDuplicate_MPIAIJ"
1834 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1835 {
1836   Mat        mat;
1837   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1838   int        ierr, len=0, flg;
1839 
1840   PetscFunctionBegin;
1841   *newmat       = 0;
1842   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1843   PLogObjectCreate(mat);
1844   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1845   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1846   mat->ops->destroy = MatDestroy_MPIAIJ;
1847   mat->ops->view    = MatView_MPIAIJ;
1848   mat->factor       = matin->factor;
1849   mat->assembled    = PETSC_TRUE;
1850   mat->insertmode   = NOT_SET_VALUES;
1851 
1852   a->m = mat->m   = oldmat->m;
1853   a->n = mat->n   = oldmat->n;
1854   a->M = mat->M   = oldmat->M;
1855   a->N = mat->N   = oldmat->N;
1856 
1857   a->rstart       = oldmat->rstart;
1858   a->rend         = oldmat->rend;
1859   a->cstart       = oldmat->cstart;
1860   a->cend         = oldmat->cend;
1861   a->size         = oldmat->size;
1862   a->rank         = oldmat->rank;
1863   a->donotstash   = oldmat->donotstash;
1864   a->roworiented  = oldmat->roworiented;
1865   a->rowindices   = 0;
1866   a->rowvalues    = 0;
1867   a->getrowactive = PETSC_FALSE;
1868 
1869   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1870   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1871   a->cowners = a->rowners + a->size + 2;
1872   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1873   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1874   if (oldmat->colmap) {
1875 #if defined (PETSC_USE_CTABLE)
1876     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1877 #else
1878     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1879     PLogObjectMemory(mat,(a->N)*sizeof(int));
1880     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1881 #endif
1882   } else a->colmap = 0;
1883   if (oldmat->garray) {
1884     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1885     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1886     PLogObjectMemory(mat,len*sizeof(int));
1887     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1888   } else a->garray = 0;
1889 
1890   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1891   PLogObjectParent(mat,a->lvec);
1892   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1893   PLogObjectParent(mat,a->Mvctx);
1894   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1895   PLogObjectParent(mat,a->A);
1896   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1897   PLogObjectParent(mat,a->B);
1898   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1899   if (flg) {
1900     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1901   }
1902   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1903   *newmat = mat;
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #include "sys.h"
1908 
1909 #undef __FUNC__
1910 #define __FUNC__ "MatLoad_MPIAIJ"
1911 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1912 {
1913   Mat          A;
1914   Scalar       *vals,*svals;
1915   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1916   MPI_Status   status;
1917   int          i, nz, ierr, j,rstart, rend, fd;
1918   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1919   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1920   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1921 
1922   PetscFunctionBegin;
1923   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1924   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1925   if (!rank) {
1926     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1927     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1928     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1929     if (header[3] < 0) {
1930       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1931     }
1932   }
1933 
1934   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1935   M = header[1]; N = header[2];
1936   /* determine ownership of all rows */
1937   m = M/size + ((M % size) > rank);
1938   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1939   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1940   rowners[0] = 0;
1941   for ( i=2; i<=size; i++ ) {
1942     rowners[i] += rowners[i-1];
1943   }
1944   rstart = rowners[rank];
1945   rend   = rowners[rank+1];
1946 
1947   /* distribute row lengths to all processors */
1948   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1949   offlens = ourlens + (rend-rstart);
1950   if (!rank) {
1951     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1952     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1953     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1954     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1955     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1956     PetscFree(sndcounts);
1957   } else {
1958     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1959   }
1960 
1961   if (!rank) {
1962     /* calculate the number of nonzeros on each processor */
1963     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1964     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1965     for ( i=0; i<size; i++ ) {
1966       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1967         procsnz[i] += rowlengths[j];
1968       }
1969     }
1970     PetscFree(rowlengths);
1971 
1972     /* determine max buffer needed and allocate it */
1973     maxnz = 0;
1974     for ( i=0; i<size; i++ ) {
1975       maxnz = PetscMax(maxnz,procsnz[i]);
1976     }
1977     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1978 
1979     /* read in my part of the matrix column indices  */
1980     nz     = procsnz[0];
1981     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1982     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1983 
1984     /* read in every one elses and ship off */
1985     for ( i=1; i<size; i++ ) {
1986       nz   = procsnz[i];
1987       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1988       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1989     }
1990     PetscFree(cols);
1991   } else {
1992     /* determine buffer space needed for message */
1993     nz = 0;
1994     for ( i=0; i<m; i++ ) {
1995       nz += ourlens[i];
1996     }
1997     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1998 
1999     /* receive message of column indices*/
2000     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2001     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2002     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2003   }
2004 
2005   /* determine column ownership if matrix is not square */
2006   if (N != M) {
2007     n      = N/size + ((N % size) > rank);
2008     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2009     cstart = cend - n;
2010   } else {
2011     cstart = rstart;
2012     cend   = rend;
2013     n      = cend - cstart;
2014   }
2015 
2016   /* loop over local rows, determining number of off diagonal entries */
2017   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2018   jj = 0;
2019   for ( i=0; i<m; i++ ) {
2020     for ( j=0; j<ourlens[i]; j++ ) {
2021       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2022       jj++;
2023     }
2024   }
2025 
2026   /* create our matrix */
2027   for ( i=0; i<m; i++ ) {
2028     ourlens[i] -= offlens[i];
2029   }
2030   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2031   A = *newmat;
2032   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2033   for ( i=0; i<m; i++ ) {
2034     ourlens[i] += offlens[i];
2035   }
2036 
2037   if (!rank) {
2038     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2039 
2040     /* read in my part of the matrix numerical values  */
2041     nz   = procsnz[0];
2042     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2043 
2044     /* insert into matrix */
2045     jj      = rstart;
2046     smycols = mycols;
2047     svals   = vals;
2048     for ( i=0; i<m; i++ ) {
2049       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2050       smycols += ourlens[i];
2051       svals   += ourlens[i];
2052       jj++;
2053     }
2054 
2055     /* read in other processors and ship out */
2056     for ( i=1; i<size; i++ ) {
2057       nz   = procsnz[i];
2058       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2059       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2060     }
2061     PetscFree(procsnz);
2062   } else {
2063     /* receive numeric values */
2064     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2065 
2066     /* receive message of values*/
2067     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2068     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2069     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2070 
2071     /* insert into matrix */
2072     jj      = rstart;
2073     smycols = mycols;
2074     svals   = vals;
2075     for ( i=0; i<m; i++ ) {
2076       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2077       smycols += ourlens[i];
2078       svals   += ourlens[i];
2079       jj++;
2080     }
2081   }
2082   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2083 
2084   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2085   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNC__
2090 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2091 /*
2092     Not great since it makes two copies of the submatrix, first an SeqAIJ
2093   in local and then by concatenating the local matrices the end result.
2094   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2095 */
2096 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2097 {
2098   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2099   Mat        *local,M, Mreuse;
2100   Scalar     *vwork,*aa;
2101   MPI_Comm   comm = mat->comm;
2102   Mat_SeqAIJ *aij;
2103   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2104 
2105   PetscFunctionBegin;
2106   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2107   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2108 
2109   if (call ==  MAT_REUSE_MATRIX) {
2110     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2111     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2112     local = &Mreuse;
2113     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2114   } else {
2115     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2116     Mreuse = *local;
2117     PetscFree(local);
2118   }
2119 
2120   /*
2121       m - number of local rows
2122       n - number of columns (same on all processors)
2123       rstart - first row in new global matrix generated
2124   */
2125   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2126   if (call == MAT_INITIAL_MATRIX) {
2127     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2128     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2129     ii  = aij->i;
2130     jj  = aij->j;
2131 
2132     /*
2133         Determine the number of non-zeros in the diagonal and off-diagonal
2134         portions of the matrix in order to do correct preallocation
2135     */
2136 
2137     /* first get start and end of "diagonal" columns */
2138     if (csize == PETSC_DECIDE) {
2139       nlocal = n/size + ((n % size) > rank);
2140     } else {
2141       nlocal = csize;
2142     }
2143     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2144     rstart = rend - nlocal;
2145     if (rank == size - 1 && rend != n) {
2146       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2147     }
2148 
2149     /* next, compute all the lengths */
2150     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2151     olens = dlens + m;
2152     for ( i=0; i<m; i++ ) {
2153       jend = ii[i+1] - ii[i];
2154       olen = 0;
2155       dlen = 0;
2156       for ( j=0; j<jend; j++ ) {
2157         if ( *jj < rstart || *jj >= rend) olen++;
2158         else dlen++;
2159         jj++;
2160       }
2161       olens[i] = olen;
2162       dlens[i] = dlen;
2163     }
2164     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2165     PetscFree(dlens);
2166   } else {
2167     int ml,nl;
2168 
2169     M = *newmat;
2170     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2171     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2172     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2173     /*
2174          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2175        rather than the slower MatSetValues().
2176     */
2177     M->was_assembled = PETSC_TRUE;
2178     M->assembled     = PETSC_FALSE;
2179   }
2180   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2181   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2182   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2183   ii  = aij->i;
2184   jj  = aij->j;
2185   aa  = aij->a;
2186   for (i=0; i<m; i++) {
2187     row   = rstart + i;
2188     nz    = ii[i+1] - ii[i];
2189     cwork = jj;     jj += nz;
2190     vwork = aa;     aa += nz;
2191     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2192   }
2193 
2194   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2195   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2196   *newmat = M;
2197 
2198   /* save submatrix used in processor for next request */
2199   if (call ==  MAT_INITIAL_MATRIX) {
2200     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2201     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2202   }
2203 
2204   PetscFunctionReturn(0);
2205 }
2206