xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 63bb2aa60f4717cbfb86ebed81540aa885ea575c)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *procs,*nprocs,j,idx,nsends,*work,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   procs  = nprocs + size;
455   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
456   for (i=0; i<N; i++) {
457     idx = rows[i];
458     found = PETSC_FALSE;
459     for (j=0; j<size; j++) {
460       if (idx >= owners[j] && idx < owners[j+1]) {
461         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
462       }
463     }
464     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
465   }
466   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
467 
468   /* inform other processors of number of messages and max length*/
469   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
470   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
471   nrecvs = work[size+rank];
472   nmax   = work[rank];
473   ierr   = PetscFree(work);CHKERRQ(ierr);
474 
475   /* post receives:   */
476   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
477   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
478   for (i=0; i<nrecvs; i++) {
479     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
480   }
481 
482   /* do sends:
483       1) starts[i] gives the starting index in svalues for stuff going to
484          the ith processor
485   */
486   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
487   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
488   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
489   starts[0] = 0;
490   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
491   for (i=0; i<N; i++) {
492     svalues[starts[owner[i]]++] = rows[i];
493   }
494   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
495 
496   starts[0] = 0;
497   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
498   count = 0;
499   for (i=0; i<size; i++) {
500     if (procs[i]) {
501       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
502     }
503   }
504   ierr = PetscFree(starts);CHKERRQ(ierr);
505 
506   base = owners[rank];
507 
508   /*  wait on receives */
509   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
510   source = lens + nrecvs;
511   count  = nrecvs; slen = 0;
512   while (count) {
513     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
514     /* unpack receives into our local space */
515     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
516     source[imdex]  = recv_status.MPI_SOURCE;
517     lens[imdex]    = n;
518     slen          += n;
519     count--;
520   }
521   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
522 
523   /* move the data into the send scatter */
524   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
525   count = 0;
526   for (i=0; i<nrecvs; i++) {
527     values = rvalues + i*nmax;
528     for (j=0; j<lens[i]; j++) {
529       lrows[count++] = values[j] - base;
530     }
531   }
532   ierr = PetscFree(rvalues);CHKERRQ(ierr);
533   ierr = PetscFree(lens);CHKERRQ(ierr);
534   ierr = PetscFree(owner);CHKERRQ(ierr);
535   ierr = PetscFree(nprocs);CHKERRQ(ierr);
536 
537   /* actually zap the local rows */
538   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
539   PetscLogObjectParent(A,istmp);
540 
541   /*
542         Zero the required rows. If the "diagonal block" of the matrix
543      is square and the user wishes to set the diagonal we use seperate
544      code so that MatSetValues() is not called for each diagonal allocating
545      new memory, thus calling lots of mallocs and slowing things down.
546 
547        Contributed by: Mathew Knepley
548   */
549   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
550   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
551   if (diag && (l->A->M == l->A->N)) {
552     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
553   } else if (diag) {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
556       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
557 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
558     }
559     for (i = 0; i < slen; i++) {
560       row  = lrows[i] + rstart;
561       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
562     }
563     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
564     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565   } else {
566     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
567   }
568   ierr = ISDestroy(istmp);CHKERRQ(ierr);
569   ierr = PetscFree(lrows);CHKERRQ(ierr);
570 
571   /* wait on sends */
572   if (nsends) {
573     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
574     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
575     ierr = PetscFree(send_status);CHKERRQ(ierr);
576   }
577   ierr = PetscFree(send_waits);CHKERRQ(ierr);
578   ierr = PetscFree(svalues);CHKERRQ(ierr);
579 
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatMult_MPIAIJ"
585 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
586 {
587   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
588   int        ierr,nt;
589 
590   PetscFunctionBegin;
591   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
592   if (nt != A->n) {
593     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
594   }
595   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
596   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
597   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "MatMultAdd_MPIAIJ"
604 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
605 {
606   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
607   int        ierr;
608 
609   PetscFunctionBegin;
610   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
611   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
612   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
613   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNCT__
618 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
619 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
620 {
621   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
622   int        ierr;
623 
624   PetscFunctionBegin;
625   /* do nondiagonal part */
626   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
627   /* send it on its way */
628   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
629   /* do local part */
630   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
631   /* receive remote parts: note this assumes the values are not actually */
632   /* inserted in yy until the next line, which is true for my implementation*/
633   /* but is not perhaps always true. */
634   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   PetscFunctionReturn(0);
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
640 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
641 {
642   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
643   int        ierr;
644 
645   PetscFunctionBegin;
646   /* do nondiagonal part */
647   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
648   /* send it on its way */
649   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
650   /* do local part */
651   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
652   /* receive remote parts: note this assumes the values are not actually */
653   /* inserted in yy until the next line, which is true for my implementation*/
654   /* but is not perhaps always true. */
655   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 /*
660   This only works correctly for square matrices where the subblock A->A is the
661    diagonal block
662 */
663 #undef __FUNCT__
664 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
665 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
666 {
667   int        ierr;
668   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
669 
670   PetscFunctionBegin;
671   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
672   if (a->rstart != a->cstart || a->rend != a->cend) {
673     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
674   }
675   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatScale_MPIAIJ"
681 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
682 {
683   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
684   int        ierr;
685 
686   PetscFunctionBegin;
687   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
688   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatDestroy_MPIAIJ"
694 int MatDestroy_MPIAIJ(Mat mat)
695 {
696   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
697   int        ierr;
698 
699   PetscFunctionBegin;
700 #if defined(PETSC_USE_LOG)
701   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
702 #endif
703   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
704   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
706   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
707 #if defined (PETSC_USE_CTABLE)
708   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
709 #else
710   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
711 #endif
712   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
713   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
714   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
715   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
716   ierr = PetscFree(aij);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
721 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
725 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
726 {
727   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
728   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
729   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
730   PetscTruth        isdraw,isascii,flg;
731   PetscViewer       sviewer;
732   PetscViewerFormat format;
733 
734   PetscFunctionBegin;
735   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
736   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
737   if (isascii) {
738     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
739     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
740       MatInfo info;
741       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
742       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
743       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
744       if (flg) {
745         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
746 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
747       } else {
748         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
749 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
750       }
751       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
752       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
753       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
754       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
755       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
756       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
757       PetscFunctionReturn(0);
758     } else if (format == PETSC_VIEWER_ASCII_INFO) {
759       PetscFunctionReturn(0);
760     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
761 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
762       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
763 #endif
764 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
765       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
766 #endif
767 
768       PetscFunctionReturn(0);
769     }
770   } else if (isdraw) {
771     PetscDraw       draw;
772     PetscTruth isnull;
773     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
774     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
775   }
776 
777   if (size == 1) {
778     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
779     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
780   } else {
781     /* assemble the entire matrix onto first processor. */
782     Mat         A;
783     Mat_SeqAIJ *Aloc;
784     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
785     PetscScalar *a;
786 
787     if (!rank) {
788       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
789     } else {
790       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
791     }
792     PetscLogObjectParent(mat,A);
793 
794     /* copy over the A part */
795     Aloc = (Mat_SeqAIJ*)aij->A->data;
796     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
797     row = aij->rstart;
798     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
799     for (i=0; i<m; i++) {
800       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
801       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
802     }
803     aj = Aloc->j;
804     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
805 
806     /* copy over the B part */
807     Aloc = (Mat_SeqAIJ*)aij->B->data;
808     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
809     row  = aij->rstart;
810     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
811     ct   = cols;
812     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
813     for (i=0; i<m; i++) {
814       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
815       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
816     }
817     ierr = PetscFree(ct);CHKERRQ(ierr);
818     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820     /*
821        Everyone has to call to draw the matrix since the graphics waits are
822        synchronized across all processors that share the PetscDraw object
823     */
824     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
825     if (!rank) {
826       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
827       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
828     }
829     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
830     ierr = MatDestroy(A);CHKERRQ(ierr);
831   }
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatView_MPIAIJ"
837 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
838 {
839   int        ierr;
840   PetscTruth isascii,isdraw,issocket,isbinary;
841 
842   PetscFunctionBegin;
843   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
844   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
845   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
846   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
847   if (isascii || isdraw || isbinary || issocket) {
848     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
849   } else {
850     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatRelax_MPIAIJ"
859 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
860 {
861   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
862   int          ierr;
863   Vec          bb1;
864   PetscScalar  mone=-1.0;
865 
866   PetscFunctionBegin;
867   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
868 
869   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
870 
871   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
872     if (flag & SOR_ZERO_INITIAL_GUESS) {
873       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
874       its--;
875     }
876 
877     while (its--) {
878       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
879       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880 
881       /* update rhs: bb1 = bb - B*x */
882       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
883       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
884 
885       /* local sweep */
886       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
887       CHKERRQ(ierr);
888     }
889   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
890     if (flag & SOR_ZERO_INITIAL_GUESS) {
891       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
892       its--;
893     }
894     while (its--) {
895       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
896       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897 
898       /* update rhs: bb1 = bb - B*x */
899       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
900       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
901 
902       /* local sweep */
903       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
904       CHKERRQ(ierr);
905     }
906   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
907     if (flag & SOR_ZERO_INITIAL_GUESS) {
908       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
909       its--;
910     }
911     while (its--) {
912       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
913       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
914 
915       /* update rhs: bb1 = bb - B*x */
916       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
917       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
918 
919       /* local sweep */
920       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
921       CHKERRQ(ierr);
922     }
923   } else {
924     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
925   }
926 
927   ierr = VecDestroy(bb1);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatGetInfo_MPIAIJ"
933 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
934 {
935   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
936   Mat        A = mat->A,B = mat->B;
937   int        ierr;
938   PetscReal  isend[5],irecv[5];
939 
940   PetscFunctionBegin;
941   info->block_size     = 1.0;
942   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
943   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
944   isend[3] = info->memory;  isend[4] = info->mallocs;
945   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
946   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
947   isend[3] += info->memory;  isend[4] += info->mallocs;
948   if (flag == MAT_LOCAL) {
949     info->nz_used      = isend[0];
950     info->nz_allocated = isend[1];
951     info->nz_unneeded  = isend[2];
952     info->memory       = isend[3];
953     info->mallocs      = isend[4];
954   } else if (flag == MAT_GLOBAL_MAX) {
955     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
956     info->nz_used      = irecv[0];
957     info->nz_allocated = irecv[1];
958     info->nz_unneeded  = irecv[2];
959     info->memory       = irecv[3];
960     info->mallocs      = irecv[4];
961   } else if (flag == MAT_GLOBAL_SUM) {
962     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
963     info->nz_used      = irecv[0];
964     info->nz_allocated = irecv[1];
965     info->nz_unneeded  = irecv[2];
966     info->memory       = irecv[3];
967     info->mallocs      = irecv[4];
968   }
969   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
970   info->fill_ratio_needed = 0;
971   info->factor_mallocs    = 0;
972   info->rows_global       = (double)matin->M;
973   info->columns_global    = (double)matin->N;
974   info->rows_local        = (double)matin->m;
975   info->columns_local     = (double)matin->N;
976 
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatSetOption_MPIAIJ"
982 int MatSetOption_MPIAIJ(Mat A,MatOption op)
983 {
984   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
985   int        ierr;
986 
987   PetscFunctionBegin;
988   switch (op) {
989   case MAT_NO_NEW_NONZERO_LOCATIONS:
990   case MAT_YES_NEW_NONZERO_LOCATIONS:
991   case MAT_COLUMNS_UNSORTED:
992   case MAT_COLUMNS_SORTED:
993   case MAT_NEW_NONZERO_ALLOCATION_ERR:
994   case MAT_KEEP_ZEROED_ROWS:
995   case MAT_NEW_NONZERO_LOCATION_ERR:
996   case MAT_USE_INODES:
997   case MAT_DO_NOT_USE_INODES:
998   case MAT_IGNORE_ZERO_ENTRIES:
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_ROW_ORIENTED:
1003     a->roworiented = PETSC_TRUE;
1004     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1005     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1006     break;
1007   case MAT_ROWS_SORTED:
1008   case MAT_ROWS_UNSORTED:
1009   case MAT_YES_NEW_DIAGONALS:
1010   case MAT_USE_SINGLE_PRECISION_SOLVES:
1011     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1012     break;
1013   case MAT_COLUMN_ORIENTED:
1014     a->roworiented = PETSC_FALSE;
1015     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1016     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1017     break;
1018   case MAT_IGNORE_OFF_PROC_ENTRIES:
1019     a->donotstash = PETSC_TRUE;
1020     break;
1021   case MAT_NO_NEW_DIAGONALS:
1022     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1023   default:
1024     SETERRQ(PETSC_ERR_SUP,"unknown option");
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatGetRow_MPIAIJ"
1031 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1032 {
1033   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1034   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1035   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1036   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1037   int          *cmap,*idx_p;
1038 
1039   PetscFunctionBegin;
1040   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1041   mat->getrowactive = PETSC_TRUE;
1042 
1043   if (!mat->rowvalues && (idx || v)) {
1044     /*
1045         allocate enough space to hold information from the longest row.
1046     */
1047     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1048     int     max = 1,tmp;
1049     for (i=0; i<matin->m; i++) {
1050       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1051       if (max < tmp) { max = tmp; }
1052     }
1053     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1054     mat->rowindices = (int*)(mat->rowvalues + max);
1055   }
1056 
1057   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1058   lrow = row - rstart;
1059 
1060   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1061   if (!v)   {pvA = 0; pvB = 0;}
1062   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1063   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   nztot = nzA + nzB;
1066 
1067   cmap  = mat->garray;
1068   if (v  || idx) {
1069     if (nztot) {
1070       /* Sort by increasing column numbers, assuming A and B already sorted */
1071       int imark = -1;
1072       if (v) {
1073         *v = v_p = mat->rowvalues;
1074         for (i=0; i<nzB; i++) {
1075           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1076           else break;
1077         }
1078         imark = i;
1079         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1080         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1081       }
1082       if (idx) {
1083         *idx = idx_p = mat->rowindices;
1084         if (imark > -1) {
1085           for (i=0; i<imark; i++) {
1086             idx_p[i] = cmap[cworkB[i]];
1087           }
1088         } else {
1089           for (i=0; i<nzB; i++) {
1090             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1091             else break;
1092           }
1093           imark = i;
1094         }
1095         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1096         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1097       }
1098     } else {
1099       if (idx) *idx = 0;
1100       if (v)   *v   = 0;
1101     }
1102   }
1103   *nz = nztot;
1104   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1105   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1111 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1112 {
1113   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1114 
1115   PetscFunctionBegin;
1116   if (aij->getrowactive == PETSC_FALSE) {
1117     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1118   }
1119   aij->getrowactive = PETSC_FALSE;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatNorm_MPIAIJ"
1125 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1126 {
1127   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1128   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1129   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1130   PetscReal    sum = 0.0;
1131   PetscScalar  *v;
1132 
1133   PetscFunctionBegin;
1134   if (aij->size == 1) {
1135     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1136   } else {
1137     if (type == NORM_FROBENIUS) {
1138       v = amat->a;
1139       for (i=0; i<amat->nz; i++) {
1140 #if defined(PETSC_USE_COMPLEX)
1141         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1142 #else
1143         sum += (*v)*(*v); v++;
1144 #endif
1145       }
1146       v = bmat->a;
1147       for (i=0; i<bmat->nz; i++) {
1148 #if defined(PETSC_USE_COMPLEX)
1149         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1150 #else
1151         sum += (*v)*(*v); v++;
1152 #endif
1153       }
1154       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1155       *norm = sqrt(*norm);
1156     } else if (type == NORM_1) { /* max column norm */
1157       PetscReal *tmp,*tmp2;
1158       int    *jj,*garray = aij->garray;
1159       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1160       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1161       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1162       *norm = 0.0;
1163       v = amat->a; jj = amat->j;
1164       for (j=0; j<amat->nz; j++) {
1165         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1166       }
1167       v = bmat->a; jj = bmat->j;
1168       for (j=0; j<bmat->nz; j++) {
1169         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1170       }
1171       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1172       for (j=0; j<mat->N; j++) {
1173         if (tmp2[j] > *norm) *norm = tmp2[j];
1174       }
1175       ierr = PetscFree(tmp);CHKERRQ(ierr);
1176       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1177     } else if (type == NORM_INFINITY) { /* max row norm */
1178       PetscReal ntemp = 0.0;
1179       for (j=0; j<aij->A->m; j++) {
1180         v = amat->a + amat->i[j] + shift;
1181         sum = 0.0;
1182         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1183           sum += PetscAbsScalar(*v); v++;
1184         }
1185         v = bmat->a + bmat->i[j] + shift;
1186         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1187           sum += PetscAbsScalar(*v); v++;
1188         }
1189         if (sum > ntemp) ntemp = sum;
1190       }
1191       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1192     } else {
1193       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1194     }
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatTranspose_MPIAIJ"
1201 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1202 {
1203   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1204   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1205   int          ierr,shift = Aloc->indexshift;
1206   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1207   Mat          B;
1208   PetscScalar  *array;
1209 
1210   PetscFunctionBegin;
1211   if (!matout && M != N) {
1212     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1213   }
1214 
1215   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1216 
1217   /* copy over the A part */
1218   Aloc = (Mat_SeqAIJ*)a->A->data;
1219   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1220   row = a->rstart;
1221   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1222   for (i=0; i<m; i++) {
1223     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1224     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1225   }
1226   aj = Aloc->j;
1227   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1228 
1229   /* copy over the B part */
1230   Aloc = (Mat_SeqAIJ*)a->B->data;
1231   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1232   row  = a->rstart;
1233   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1234   ct   = cols;
1235   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1236   for (i=0; i<m; i++) {
1237     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1238     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1239   }
1240   ierr = PetscFree(ct);CHKERRQ(ierr);
1241   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1242   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1243   if (matout) {
1244     *matout = B;
1245   } else {
1246     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1247   }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1253 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1254 {
1255   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1256   Mat        a = aij->A,b = aij->B;
1257   int        ierr,s1,s2,s3;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1261   if (rr) {
1262     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1263     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1264     /* Overlap communication with computation. */
1265     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1266   }
1267   if (ll) {
1268     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1269     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1270     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1271   }
1272   /* scale  the diagonal block */
1273   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1274 
1275   if (rr) {
1276     /* Do a scatter end and then right scale the off-diagonal block */
1277     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1278     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1279   }
1280 
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1287 int MatPrintHelp_MPIAIJ(Mat A)
1288 {
1289   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1290   int        ierr;
1291 
1292   PetscFunctionBegin;
1293   if (!a->rank) {
1294     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1295   }
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1301 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1302 {
1303   PetscFunctionBegin;
1304   *bs = 1;
1305   PetscFunctionReturn(0);
1306 }
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1309 int MatSetUnfactored_MPIAIJ(Mat A)
1310 {
1311   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1312   int        ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatEqual_MPIAIJ"
1321 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1322 {
1323   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1324   Mat        a,b,c,d;
1325   PetscTruth flg;
1326   int        ierr;
1327 
1328   PetscFunctionBegin;
1329   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1330   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1331   a = matA->A; b = matA->B;
1332   c = matB->A; d = matB->B;
1333 
1334   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1335   if (flg == PETSC_TRUE) {
1336     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1337   }
1338   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatCopy_MPIAIJ"
1344 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1345 {
1346   int        ierr;
1347   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1348   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1349   PetscTruth flg;
1350 
1351   PetscFunctionBegin;
1352   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1353   if (str != SAME_NONZERO_PATTERN || !flg) {
1354     /* because of the column compression in the off-processor part of the matrix a->B,
1355        the number of columns in a->B and b->B may be different, hence we cannot call
1356        the MatCopy() directly on the two parts. If need be, we can provide a more
1357        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1358        then copying the submatrices */
1359     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1360   } else {
1361     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1362     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1369 int MatSetUpPreallocation_MPIAIJ(Mat A)
1370 {
1371   int        ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1379 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1380 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1381 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1382 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1383 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1384 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1385 #endif
1386 
1387 #include "petscblaslapack.h"
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatAXPY_MPIAIJ"
1391 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1392 {
1393   int        ierr,one;
1394   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1395   Mat_SeqAIJ *x,*y;
1396 
1397   PetscFunctionBegin;
1398   if (str == SAME_NONZERO_PATTERN) {
1399     x  = (Mat_SeqAIJ *)xx->A->data;
1400     y  = (Mat_SeqAIJ *)yy->A->data;
1401     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1402     x  = (Mat_SeqAIJ *)xx->B->data;
1403     y  = (Mat_SeqAIJ *)yy->B->data;
1404     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1405   } else {
1406     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 /* -------------------------------------------------------------------*/
1412 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1413        MatGetRow_MPIAIJ,
1414        MatRestoreRow_MPIAIJ,
1415        MatMult_MPIAIJ,
1416        MatMultAdd_MPIAIJ,
1417        MatMultTranspose_MPIAIJ,
1418        MatMultTransposeAdd_MPIAIJ,
1419        0,
1420        0,
1421        0,
1422        0,
1423        0,
1424        0,
1425        MatRelax_MPIAIJ,
1426        MatTranspose_MPIAIJ,
1427        MatGetInfo_MPIAIJ,
1428        MatEqual_MPIAIJ,
1429        MatGetDiagonal_MPIAIJ,
1430        MatDiagonalScale_MPIAIJ,
1431        MatNorm_MPIAIJ,
1432        MatAssemblyBegin_MPIAIJ,
1433        MatAssemblyEnd_MPIAIJ,
1434        0,
1435        MatSetOption_MPIAIJ,
1436        MatZeroEntries_MPIAIJ,
1437        MatZeroRows_MPIAIJ,
1438 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1439        MatLUFactorSymbolic_MPIAIJ_TFS,
1440 #else
1441        0,
1442 #endif
1443        0,
1444        0,
1445        0,
1446        MatSetUpPreallocation_MPIAIJ,
1447        0,
1448        0,
1449        0,
1450        0,
1451        MatDuplicate_MPIAIJ,
1452        0,
1453        0,
1454        0,
1455        0,
1456        MatAXPY_MPIAIJ,
1457        MatGetSubMatrices_MPIAIJ,
1458        MatIncreaseOverlap_MPIAIJ,
1459        MatGetValues_MPIAIJ,
1460        MatCopy_MPIAIJ,
1461        MatPrintHelp_MPIAIJ,
1462        MatScale_MPIAIJ,
1463        0,
1464        0,
1465        0,
1466        MatGetBlockSize_MPIAIJ,
1467        0,
1468        0,
1469        0,
1470        0,
1471        MatFDColoringCreate_MPIAIJ,
1472        0,
1473        MatSetUnfactored_MPIAIJ,
1474        0,
1475        0,
1476        MatGetSubMatrix_MPIAIJ,
1477        MatDestroy_MPIAIJ,
1478        MatView_MPIAIJ,
1479        MatGetPetscMaps_Petsc,
1480        0,
1481        0,
1482        0,
1483        0,
1484        0,
1485        0,
1486        0,
1487        0,
1488        MatSetColoring_MPIAIJ,
1489        MatSetValuesAdic_MPIAIJ,
1490        MatSetValuesAdifor_MPIAIJ
1491 };
1492 
1493 /* ----------------------------------------------------------------------------------------*/
1494 
1495 EXTERN_C_BEGIN
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1498 int MatStoreValues_MPIAIJ(Mat mat)
1499 {
1500   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1501   int        ierr;
1502 
1503   PetscFunctionBegin;
1504   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1505   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 EXTERN_C_END
1509 
1510 EXTERN_C_BEGIN
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1513 int MatRetrieveValues_MPIAIJ(Mat mat)
1514 {
1515   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1516   int        ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1520   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1521   PetscFunctionReturn(0);
1522 }
1523 EXTERN_C_END
1524 
1525 #include "petscpc.h"
1526 EXTERN_C_BEGIN
1527 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1528 EXTERN_C_END
1529 
1530 EXTERN_C_BEGIN
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatCreate_MPIAIJ"
1533 int MatCreate_MPIAIJ(Mat B)
1534 {
1535   Mat_MPIAIJ *b;
1536   int        ierr,i,size;
1537 
1538   PetscFunctionBegin;
1539   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1540 
1541   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1542   B->data         = (void*)b;
1543   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1544   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1545   B->factor       = 0;
1546   B->assembled    = PETSC_FALSE;
1547   B->mapping      = 0;
1548 
1549   B->insertmode      = NOT_SET_VALUES;
1550   b->size            = size;
1551   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1552 
1553   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1554   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1555 
1556   /* the information in the maps duplicates the information computed below, eventually
1557      we should remove the duplicate information that is not contained in the maps */
1558   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1559   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1560 
1561   /* build local table of row and column ownerships */
1562   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1563   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1564   b->cowners = b->rowners + b->size + 2;
1565   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1566   b->rowners[0] = 0;
1567   for (i=2; i<=b->size; i++) {
1568     b->rowners[i] += b->rowners[i-1];
1569   }
1570   b->rstart = b->rowners[b->rank];
1571   b->rend   = b->rowners[b->rank+1];
1572   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1573   b->cowners[0] = 0;
1574   for (i=2; i<=b->size; i++) {
1575     b->cowners[i] += b->cowners[i-1];
1576   }
1577   b->cstart = b->cowners[b->rank];
1578   b->cend   = b->cowners[b->rank+1];
1579 
1580   /* build cache for off array entries formed */
1581   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1582   b->donotstash  = PETSC_FALSE;
1583   b->colmap      = 0;
1584   b->garray      = 0;
1585   b->roworiented = PETSC_TRUE;
1586 
1587   /* stuff used for matrix vector multiply */
1588   b->lvec      = PETSC_NULL;
1589   b->Mvctx     = PETSC_NULL;
1590 
1591   /* stuff for MatGetRow() */
1592   b->rowindices   = 0;
1593   b->rowvalues    = 0;
1594   b->getrowactive = PETSC_FALSE;
1595 
1596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1597                                      "MatStoreValues_MPIAIJ",
1598                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1600                                      "MatRetrieveValues_MPIAIJ",
1601                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1603                                      "MatGetDiagonalBlock_MPIAIJ",
1604                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1605 
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1612 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1613 {
1614   Mat        mat;
1615   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1616   int        ierr;
1617 
1618   PetscFunctionBegin;
1619   *newmat       = 0;
1620   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1621   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1622   a    = (Mat_MPIAIJ*)mat->data;
1623   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1624   mat->factor       = matin->factor;
1625   mat->assembled    = PETSC_TRUE;
1626   mat->insertmode   = NOT_SET_VALUES;
1627   mat->preallocated = PETSC_TRUE;
1628 
1629   a->rstart       = oldmat->rstart;
1630   a->rend         = oldmat->rend;
1631   a->cstart       = oldmat->cstart;
1632   a->cend         = oldmat->cend;
1633   a->size         = oldmat->size;
1634   a->rank         = oldmat->rank;
1635   a->donotstash   = oldmat->donotstash;
1636   a->roworiented  = oldmat->roworiented;
1637   a->rowindices   = 0;
1638   a->rowvalues    = 0;
1639   a->getrowactive = PETSC_FALSE;
1640 
1641   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1642   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1643   if (oldmat->colmap) {
1644 #if defined (PETSC_USE_CTABLE)
1645     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1646 #else
1647     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1648     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1649     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1650 #endif
1651   } else a->colmap = 0;
1652   if (oldmat->garray) {
1653     int len;
1654     len  = oldmat->B->n;
1655     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1656     PetscLogObjectMemory(mat,len*sizeof(int));
1657     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1658   } else a->garray = 0;
1659 
1660   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1661   PetscLogObjectParent(mat,a->lvec);
1662   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1663   PetscLogObjectParent(mat,a->Mvctx);
1664   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1665   PetscLogObjectParent(mat,a->A);
1666   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1667   PetscLogObjectParent(mat,a->B);
1668   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1669   *newmat = mat;
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #include "petscsys.h"
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatLoad_MPIAIJ"
1678 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1679 {
1680   Mat          A;
1681   PetscScalar  *vals,*svals;
1682   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1683   MPI_Status   status;
1684   int          i,nz,ierr,j,rstart,rend,fd;
1685   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1686   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1687   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1688 
1689   PetscFunctionBegin;
1690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1692   if (!rank) {
1693     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1694     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1695     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1696     if (header[3] < 0) {
1697       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1698     }
1699   }
1700 
1701   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1702   M = header[1]; N = header[2];
1703   /* determine ownership of all rows */
1704   m = M/size + ((M % size) > rank);
1705   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1706   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1707   rowners[0] = 0;
1708   for (i=2; i<=size; i++) {
1709     rowners[i] += rowners[i-1];
1710   }
1711   rstart = rowners[rank];
1712   rend   = rowners[rank+1];
1713 
1714   /* distribute row lengths to all processors */
1715   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1716   offlens = ourlens + (rend-rstart);
1717   if (!rank) {
1718     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1719     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1720     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1721     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1722     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1723     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1724   } else {
1725     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1726   }
1727 
1728   if (!rank) {
1729     /* calculate the number of nonzeros on each processor */
1730     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1731     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1732     for (i=0; i<size; i++) {
1733       for (j=rowners[i]; j< rowners[i+1]; j++) {
1734         procsnz[i] += rowlengths[j];
1735       }
1736     }
1737     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1738 
1739     /* determine max buffer needed and allocate it */
1740     maxnz = 0;
1741     for (i=0; i<size; i++) {
1742       maxnz = PetscMax(maxnz,procsnz[i]);
1743     }
1744     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1745 
1746     /* read in my part of the matrix column indices  */
1747     nz   = procsnz[0];
1748     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1749     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1750 
1751     /* read in every one elses and ship off */
1752     for (i=1; i<size; i++) {
1753       nz   = procsnz[i];
1754       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1755       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1756     }
1757     ierr = PetscFree(cols);CHKERRQ(ierr);
1758   } else {
1759     /* determine buffer space needed for message */
1760     nz = 0;
1761     for (i=0; i<m; i++) {
1762       nz += ourlens[i];
1763     }
1764     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1765 
1766     /* receive message of column indices*/
1767     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1768     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1769     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1770   }
1771 
1772   /* determine column ownership if matrix is not square */
1773   if (N != M) {
1774     n      = N/size + ((N % size) > rank);
1775     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1776     cstart = cend - n;
1777   } else {
1778     cstart = rstart;
1779     cend   = rend;
1780     n      = cend - cstart;
1781   }
1782 
1783   /* loop over local rows, determining number of off diagonal entries */
1784   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1785   jj = 0;
1786   for (i=0; i<m; i++) {
1787     for (j=0; j<ourlens[i]; j++) {
1788       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1789       jj++;
1790     }
1791   }
1792 
1793   /* create our matrix */
1794   for (i=0; i<m; i++) {
1795     ourlens[i] -= offlens[i];
1796   }
1797   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1798   A = *newmat;
1799   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1800   for (i=0; i<m; i++) {
1801     ourlens[i] += offlens[i];
1802   }
1803 
1804   if (!rank) {
1805     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1806 
1807     /* read in my part of the matrix numerical values  */
1808     nz   = procsnz[0];
1809     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1810 
1811     /* insert into matrix */
1812     jj      = rstart;
1813     smycols = mycols;
1814     svals   = vals;
1815     for (i=0; i<m; i++) {
1816       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1817       smycols += ourlens[i];
1818       svals   += ourlens[i];
1819       jj++;
1820     }
1821 
1822     /* read in other processors and ship out */
1823     for (i=1; i<size; i++) {
1824       nz   = procsnz[i];
1825       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1826       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1827     }
1828     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1829   } else {
1830     /* receive numeric values */
1831     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1832 
1833     /* receive message of values*/
1834     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1835     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1836     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1837 
1838     /* insert into matrix */
1839     jj      = rstart;
1840     smycols = mycols;
1841     svals   = vals;
1842     for (i=0; i<m; i++) {
1843       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1844       smycols += ourlens[i];
1845       svals   += ourlens[i];
1846       jj++;
1847     }
1848   }
1849   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1850   ierr = PetscFree(vals);CHKERRQ(ierr);
1851   ierr = PetscFree(mycols);CHKERRQ(ierr);
1852   ierr = PetscFree(rowners);CHKERRQ(ierr);
1853 
1854   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1855   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 EXTERN_C_END
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1862 /*
1863     Not great since it makes two copies of the submatrix, first an SeqAIJ
1864   in local and then by concatenating the local matrices the end result.
1865   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1866 */
1867 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1868 {
1869   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1870   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1871   Mat          *local,M,Mreuse;
1872   PetscScalar  *vwork,*aa;
1873   MPI_Comm     comm = mat->comm;
1874   Mat_SeqAIJ   *aij;
1875 
1876 
1877   PetscFunctionBegin;
1878   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1879   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1880 
1881   if (call ==  MAT_REUSE_MATRIX) {
1882     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1883     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1884     local = &Mreuse;
1885     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1886   } else {
1887     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1888     Mreuse = *local;
1889     ierr   = PetscFree(local);CHKERRQ(ierr);
1890   }
1891 
1892   /*
1893       m - number of local rows
1894       n - number of columns (same on all processors)
1895       rstart - first row in new global matrix generated
1896   */
1897   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1898   if (call == MAT_INITIAL_MATRIX) {
1899     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1900     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1901     ii  = aij->i;
1902     jj  = aij->j;
1903 
1904     /*
1905         Determine the number of non-zeros in the diagonal and off-diagonal
1906         portions of the matrix in order to do correct preallocation
1907     */
1908 
1909     /* first get start and end of "diagonal" columns */
1910     if (csize == PETSC_DECIDE) {
1911       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1912       if (mglobal == n) { /* square matrix */
1913 	nlocal = m;
1914       } else {
1915         nlocal = n/size + ((n % size) > rank);
1916       }
1917     } else {
1918       nlocal = csize;
1919     }
1920     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1921     rstart = rend - nlocal;
1922     if (rank == size - 1 && rend != n) {
1923       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1924     }
1925 
1926     /* next, compute all the lengths */
1927     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1928     olens = dlens + m;
1929     for (i=0; i<m; i++) {
1930       jend = ii[i+1] - ii[i];
1931       olen = 0;
1932       dlen = 0;
1933       for (j=0; j<jend; j++) {
1934         if (*jj < rstart || *jj >= rend) olen++;
1935         else dlen++;
1936         jj++;
1937       }
1938       olens[i] = olen;
1939       dlens[i] = dlen;
1940     }
1941     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1942     ierr = PetscFree(dlens);CHKERRQ(ierr);
1943   } else {
1944     int ml,nl;
1945 
1946     M = *newmat;
1947     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1948     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1949     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1950     /*
1951          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1952        rather than the slower MatSetValues().
1953     */
1954     M->was_assembled = PETSC_TRUE;
1955     M->assembled     = PETSC_FALSE;
1956   }
1957   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1958   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1959   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1960   ii  = aij->i;
1961   jj  = aij->j;
1962   aa  = aij->a;
1963   for (i=0; i<m; i++) {
1964     row   = rstart + i;
1965     nz    = ii[i+1] - ii[i];
1966     cwork = jj;     jj += nz;
1967     vwork = aa;     aa += nz;
1968     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1969   }
1970 
1971   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   *newmat = M;
1974 
1975   /* save submatrix used in processor for next request */
1976   if (call ==  MAT_INITIAL_MATRIX) {
1977     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1978     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1979   }
1980 
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 #undef __FUNCT__
1985 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1986 /*@C
1987    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1988    (the default parallel PETSc format).  For good matrix assembly performance
1989    the user should preallocate the matrix storage by setting the parameters
1990    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1991    performance can be increased by more than a factor of 50.
1992 
1993    Collective on MPI_Comm
1994 
1995    Input Parameters:
1996 +  A - the matrix
1997 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1998            (same value is used for all local rows)
1999 .  d_nnz - array containing the number of nonzeros in the various rows of the
2000            DIAGONAL portion of the local submatrix (possibly different for each row)
2001            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2002            The size of this array is equal to the number of local rows, i.e 'm'.
2003            You must leave room for the diagonal entry even if it is zero.
2004 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2005            submatrix (same value is used for all local rows).
2006 -  o_nnz - array containing the number of nonzeros in the various rows of the
2007            OFF-DIAGONAL portion of the local submatrix (possibly different for
2008            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2009            structure. The size of this array is equal to the number
2010            of local rows, i.e 'm'.
2011 
2012    The AIJ format (also called the Yale sparse matrix format or
2013    compressed row storage), is fully compatible with standard Fortran 77
2014    storage.  That is, the stored row and column indices can begin at
2015    either one (as in Fortran) or zero.  See the users manual for details.
2016 
2017    The user MUST specify either the local or global matrix dimensions
2018    (possibly both).
2019 
2020    The parallel matrix is partitioned such that the first m0 rows belong to
2021    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2022    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2023 
2024    The DIAGONAL portion of the local submatrix of a processor can be defined
2025    as the submatrix which is obtained by extraction the part corresponding
2026    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2027    first row that belongs to the processor, and r2 is the last row belonging
2028    to the this processor. This is a square mxm matrix. The remaining portion
2029    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2030 
2031    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2032 
2033    By default, this format uses inodes (identical nodes) when possible.
2034    We search for consecutive rows with the same nonzero structure, thereby
2035    reusing matrix information to achieve increased efficiency.
2036 
2037    Options Database Keys:
2038 +  -mat_aij_no_inode  - Do not use inodes
2039 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2040 -  -mat_aij_oneindex - Internally use indexing starting at 1
2041         rather than 0.  Note that when calling MatSetValues(),
2042         the user still MUST index entries starting at 0!
2043 
2044    Example usage:
2045 
2046    Consider the following 8x8 matrix with 34 non-zero values, that is
2047    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2048    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2049    as follows:
2050 
2051 .vb
2052             1  2  0  |  0  3  0  |  0  4
2053     Proc0   0  5  6  |  7  0  0  |  8  0
2054             9  0 10  | 11  0  0  | 12  0
2055     -------------------------------------
2056            13  0 14  | 15 16 17  |  0  0
2057     Proc1   0 18  0  | 19 20 21  |  0  0
2058             0  0  0  | 22 23  0  | 24  0
2059     -------------------------------------
2060     Proc2  25 26 27  |  0  0 28  | 29  0
2061            30  0  0  | 31 32 33  |  0 34
2062 .ve
2063 
2064    This can be represented as a collection of submatrices as:
2065 
2066 .vb
2067       A B C
2068       D E F
2069       G H I
2070 .ve
2071 
2072    Where the submatrices A,B,C are owned by proc0, D,E,F are
2073    owned by proc1, G,H,I are owned by proc2.
2074 
2075    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2076    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2077    The 'M','N' parameters are 8,8, and have the same values on all procs.
2078 
2079    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2080    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2081    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2082    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2083    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2084    matrix, ans [DF] as another SeqAIJ matrix.
2085 
2086    When d_nz, o_nz parameters are specified, d_nz storage elements are
2087    allocated for every row of the local diagonal submatrix, and o_nz
2088    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2089    One way to choose d_nz and o_nz is to use the max nonzerors per local
2090    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2091    In this case, the values of d_nz,o_nz are:
2092 .vb
2093      proc0 : dnz = 2, o_nz = 2
2094      proc1 : dnz = 3, o_nz = 2
2095      proc2 : dnz = 1, o_nz = 4
2096 .ve
2097    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2098    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2099    for proc3. i.e we are using 12+15+10=37 storage locations to store
2100    34 values.
2101 
2102    When d_nnz, o_nnz parameters are specified, the storage is specified
2103    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2104    In the above case the values for d_nnz,o_nnz are:
2105 .vb
2106      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2107      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2108      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2109 .ve
2110    Here the space allocated is sum of all the above values i.e 34, and
2111    hence pre-allocation is perfect.
2112 
2113    Level: intermediate
2114 
2115 .keywords: matrix, aij, compressed row, sparse, parallel
2116 
2117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2118 @*/
2119 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2120 {
2121   Mat_MPIAIJ   *b;
2122   int          ierr,i;
2123   PetscTruth   flg2;
2124 
2125   PetscFunctionBegin;
2126   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2127   if (!flg2) PetscFunctionReturn(0);
2128   B->preallocated = PETSC_TRUE;
2129   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2130   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2131   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2132   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2133   if (d_nnz) {
2134     for (i=0; i<B->m; i++) {
2135       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2136     }
2137   }
2138   if (o_nnz) {
2139     for (i=0; i<B->m; i++) {
2140       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2141     }
2142   }
2143   b = (Mat_MPIAIJ*)B->data;
2144 
2145   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2146   PetscLogObjectParent(B,b->A);
2147   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2148   PetscLogObjectParent(B,b->B);
2149 
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatCreateMPIAIJ"
2155 /*@C
2156    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2157    (the default parallel PETSc format).  For good matrix assembly performance
2158    the user should preallocate the matrix storage by setting the parameters
2159    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2160    performance can be increased by more than a factor of 50.
2161 
2162    Collective on MPI_Comm
2163 
2164    Input Parameters:
2165 +  comm - MPI communicator
2166 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2167            This value should be the same as the local size used in creating the
2168            y vector for the matrix-vector product y = Ax.
2169 .  n - This value should be the same as the local size used in creating the
2170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2171        calculated if N is given) For square matrices n is almost always m.
2172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2174 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2175            (same value is used for all local rows)
2176 .  d_nnz - array containing the number of nonzeros in the various rows of the
2177            DIAGONAL portion of the local submatrix (possibly different for each row)
2178            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2179            The size of this array is equal to the number of local rows, i.e 'm'.
2180            You must leave room for the diagonal entry even if it is zero.
2181 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2182            submatrix (same value is used for all local rows).
2183 -  o_nnz - array containing the number of nonzeros in the various rows of the
2184            OFF-DIAGONAL portion of the local submatrix (possibly different for
2185            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2186            structure. The size of this array is equal to the number
2187            of local rows, i.e 'm'.
2188 
2189    Output Parameter:
2190 .  A - the matrix
2191 
2192    Notes:
2193    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2194    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2195    storage requirements for this matrix.
2196 
2197    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2198    processor than it must be used on all processors that share the object for
2199    that argument.
2200 
2201    The AIJ format (also called the Yale sparse matrix format or
2202    compressed row storage), is fully compatible with standard Fortran 77
2203    storage.  That is, the stored row and column indices can begin at
2204    either one (as in Fortran) or zero.  See the users manual for details.
2205 
2206    The user MUST specify either the local or global matrix dimensions
2207    (possibly both).
2208 
2209    The parallel matrix is partitioned such that the first m0 rows belong to
2210    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2211    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2212 
2213    The DIAGONAL portion of the local submatrix of a processor can be defined
2214    as the submatrix which is obtained by extraction the part corresponding
2215    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2216    first row that belongs to the processor, and r2 is the last row belonging
2217    to the this processor. This is a square mxm matrix. The remaining portion
2218    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2219 
2220    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2221 
2222    By default, this format uses inodes (identical nodes) when possible.
2223    We search for consecutive rows with the same nonzero structure, thereby
2224    reusing matrix information to achieve increased efficiency.
2225 
2226    Options Database Keys:
2227 +  -mat_aij_no_inode  - Do not use inodes
2228 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2229 -  -mat_aij_oneindex - Internally use indexing starting at 1
2230         rather than 0.  Note that when calling MatSetValues(),
2231         the user still MUST index entries starting at 0!
2232 
2233 
2234    Example usage:
2235 
2236    Consider the following 8x8 matrix with 34 non-zero values, that is
2237    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2238    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2239    as follows:
2240 
2241 .vb
2242             1  2  0  |  0  3  0  |  0  4
2243     Proc0   0  5  6  |  7  0  0  |  8  0
2244             9  0 10  | 11  0  0  | 12  0
2245     -------------------------------------
2246            13  0 14  | 15 16 17  |  0  0
2247     Proc1   0 18  0  | 19 20 21  |  0  0
2248             0  0  0  | 22 23  0  | 24  0
2249     -------------------------------------
2250     Proc2  25 26 27  |  0  0 28  | 29  0
2251            30  0  0  | 31 32 33  |  0 34
2252 .ve
2253 
2254    This can be represented as a collection of submatrices as:
2255 
2256 .vb
2257       A B C
2258       D E F
2259       G H I
2260 .ve
2261 
2262    Where the submatrices A,B,C are owned by proc0, D,E,F are
2263    owned by proc1, G,H,I are owned by proc2.
2264 
2265    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2266    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2267    The 'M','N' parameters are 8,8, and have the same values on all procs.
2268 
2269    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2270    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2271    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2272    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2273    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2274    matrix, ans [DF] as another SeqAIJ matrix.
2275 
2276    When d_nz, o_nz parameters are specified, d_nz storage elements are
2277    allocated for every row of the local diagonal submatrix, and o_nz
2278    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2279    One way to choose d_nz and o_nz is to use the max nonzerors per local
2280    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2281    In this case, the values of d_nz,o_nz are:
2282 .vb
2283      proc0 : dnz = 2, o_nz = 2
2284      proc1 : dnz = 3, o_nz = 2
2285      proc2 : dnz = 1, o_nz = 4
2286 .ve
2287    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2288    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2289    for proc3. i.e we are using 12+15+10=37 storage locations to store
2290    34 values.
2291 
2292    When d_nnz, o_nnz parameters are specified, the storage is specified
2293    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2294    In the above case the values for d_nnz,o_nnz are:
2295 .vb
2296      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2297      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2298      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2299 .ve
2300    Here the space allocated is sum of all the above values i.e 34, and
2301    hence pre-allocation is perfect.
2302 
2303    Level: intermediate
2304 
2305 .keywords: matrix, aij, compressed row, sparse, parallel
2306 
2307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2308 @*/
2309 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2310 {
2311   int ierr,size;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2315   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2316   if (size > 1) {
2317     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2318     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2319   } else {
2320     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2321     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2322   }
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2328 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2329 {
2330   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2331   PetscFunctionBegin;
2332   *Ad     = a->A;
2333   *Ao     = a->B;
2334   *colmap = a->garray;
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2340 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2341 {
2342   int        ierr;
2343   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2344 
2345   PetscFunctionBegin;
2346   if (coloring->ctype == IS_COLORING_LOCAL) {
2347     int        *allcolors,*colors,i;
2348     ISColoring ocoloring;
2349 
2350     /* set coloring for diagonal portion */
2351     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2352 
2353     /* set coloring for off-diagonal portion */
2354     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2355     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2356     for (i=0; i<a->B->n; i++) {
2357       colors[i] = allcolors[a->garray[i]];
2358     }
2359     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2360     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2361     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2362     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2363   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2364     int        *colors,i,*larray;
2365     ISColoring ocoloring;
2366 
2367     /* set coloring for diagonal portion */
2368     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2369     for (i=0; i<a->A->n; i++) {
2370       larray[i] = i + a->cstart;
2371     }
2372     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2373     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2374     for (i=0; i<a->A->n; i++) {
2375       colors[i] = coloring->colors[larray[i]];
2376     }
2377     ierr = PetscFree(larray);CHKERRQ(ierr);
2378     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2379     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2380     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2381 
2382     /* set coloring for off-diagonal portion */
2383     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2384     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2385     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2386     for (i=0; i<a->B->n; i++) {
2387       colors[i] = coloring->colors[larray[i]];
2388     }
2389     ierr = PetscFree(larray);CHKERRQ(ierr);
2390     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2391     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2392     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2393   } else {
2394     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2395   }
2396 
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2402 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2403 {
2404   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2405   int        ierr;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2409   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2415 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2416 {
2417   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2418   int        ierr;
2419 
2420   PetscFunctionBegin;
2421   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2422   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatFileMerge"
2428 /*@C
2429       MatFileMerge - Saves a single large PETSc matrix by reading in
2430             sequential matrices from each processor and concatinating them
2431             together.
2432 
2433     Collective on MPI_Comm
2434 
2435    Input Parameters:
2436 +    comm - the communicators the individual files are on
2437 .    infile - name of input file
2438 -    outfile - name of output file
2439 
2440     Level: advanced
2441 
2442    Notes: The number of columns of the matrix in EACH of the seperate files
2443       MUST be the same.
2444 
2445           The true name of the input files on each processor is obtained by
2446       appending .rank onto the infile string; where rank is 0 up to one less
2447       then the number of processors
2448 @*/
2449 int MatFileMerge(MPI_Comm comm,char *infile,char *outfile)
2450 {
2451   int         ierr,rank,len,m,n,i,rstart,*indx,nnz,I;
2452   PetscViewer in,out;
2453   char        *name;
2454   Mat         A,B;
2455   PetscScalar *values;
2456 
2457   PetscFunctionBegin;
2458   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2459   ierr = PetscStrlen(infile,&len);CHKERRQ(ierr);
2460   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2461   sprintf(name,"%s.%d",infile,rank);
2462   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_RDONLY,&in);CHKERRQ(ierr);
2463   ierr = PetscFree(name);
2464   ierr = MatLoad(in,MATSEQAIJ,&A);CHKERRQ(ierr);
2465   ierr = PetscViewerDestroy(in);CHKERRQ(ierr);
2466 
2467   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
2468   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DETERMINE,n,0,0,0,0,&B);CHKERRQ(ierr);
2469   ierr = MatGetOwnershipRange(B,&rstart,0);CHKERRQ(ierr);
2470   for (i=0;i<m;i++) {
2471     ierr = MatGetRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr);
2472     I    = i + rstart;
2473     ierr = MatSetValues(B,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2474     ierr = MatRestoreRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr);
2475   }
2476   ierr = MatDestroy(A);CHKERRQ(ierr);
2477   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2478   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2479 
2480   ierr = PetscViewerBinaryOpen(comm,outfile,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2481   ierr = MatView(B,out);CHKERRQ(ierr);
2482   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2483   ierr = MatDestroy(B);CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatFileSplit"
2489 int MatFileSplit(Mat A,char *outfile)
2490 {
2491   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2492   PetscViewer out;
2493   char        *name;
2494   Mat         B;
2495   PetscScalar *values;
2496 
2497   PetscFunctionBegin;
2498 
2499   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2500   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2501   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2502   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2503   for (i=0;i<m;i++) {
2504     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2505     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2506     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2507   }
2508   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2509   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2510 
2511   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2512   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2513   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2514   sprintf(name,"%s.%d",outfile,rank);
2515   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2516   ierr = PetscFree(name);
2517   ierr = MatView(B,out);CHKERRQ(ierr);
2518   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2519   ierr = MatDestroy(B);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522