xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 456192e25527e6dfd94efe19f31bdabe85ed0893)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *procs,*nprocs,j,idx,nsends,*work,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   procs  = nprocs + size;
455   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
456   for (i=0; i<N; i++) {
457     idx = rows[i];
458     found = PETSC_FALSE;
459     for (j=0; j<size; j++) {
460       if (idx >= owners[j] && idx < owners[j+1]) {
461         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
462       }
463     }
464     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
465   }
466   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
467 
468   /* inform other processors of number of messages and max length*/
469   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
470   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
471   nrecvs = work[size+rank];
472   nmax   = work[rank];
473   ierr   = PetscFree(work);CHKERRQ(ierr);
474 
475   /* post receives:   */
476   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
477   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
478   for (i=0; i<nrecvs; i++) {
479     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
480   }
481 
482   /* do sends:
483       1) starts[i] gives the starting index in svalues for stuff going to
484          the ith processor
485   */
486   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
487   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
488   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
489   starts[0] = 0;
490   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
491   for (i=0; i<N; i++) {
492     svalues[starts[owner[i]]++] = rows[i];
493   }
494   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
495 
496   starts[0] = 0;
497   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
498   count = 0;
499   for (i=0; i<size; i++) {
500     if (procs[i]) {
501       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
502     }
503   }
504   ierr = PetscFree(starts);CHKERRQ(ierr);
505 
506   base = owners[rank];
507 
508   /*  wait on receives */
509   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
510   source = lens + nrecvs;
511   count  = nrecvs; slen = 0;
512   while (count) {
513     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
514     /* unpack receives into our local space */
515     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
516     source[imdex]  = recv_status.MPI_SOURCE;
517     lens[imdex]    = n;
518     slen          += n;
519     count--;
520   }
521   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
522 
523   /* move the data into the send scatter */
524   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
525   count = 0;
526   for (i=0; i<nrecvs; i++) {
527     values = rvalues + i*nmax;
528     for (j=0; j<lens[i]; j++) {
529       lrows[count++] = values[j] - base;
530     }
531   }
532   ierr = PetscFree(rvalues);CHKERRQ(ierr);
533   ierr = PetscFree(lens);CHKERRQ(ierr);
534   ierr = PetscFree(owner);CHKERRQ(ierr);
535   ierr = PetscFree(nprocs);CHKERRQ(ierr);
536 
537   /* actually zap the local rows */
538   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
539   PetscLogObjectParent(A,istmp);
540 
541   /*
542         Zero the required rows. If the "diagonal block" of the matrix
543      is square and the user wishes to set the diagonal we use seperate
544      code so that MatSetValues() is not called for each diagonal allocating
545      new memory, thus calling lots of mallocs and slowing things down.
546 
547        Contributed by: Mathew Knepley
548   */
549   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
550   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
551   if (diag && (l->A->M == l->A->N)) {
552     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
553   } else if (diag) {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
556       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
557 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
558     }
559     for (i = 0; i < slen; i++) {
560       row  = lrows[i] + rstart;
561       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
562     }
563     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
564     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565   } else {
566     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
567   }
568   ierr = ISDestroy(istmp);CHKERRQ(ierr);
569   ierr = PetscFree(lrows);CHKERRQ(ierr);
570 
571   /* wait on sends */
572   if (nsends) {
573     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
574     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
575     ierr = PetscFree(send_status);CHKERRQ(ierr);
576   }
577   ierr = PetscFree(send_waits);CHKERRQ(ierr);
578   ierr = PetscFree(svalues);CHKERRQ(ierr);
579 
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatMult_MPIAIJ"
585 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
586 {
587   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
588   int        ierr,nt;
589 
590   PetscFunctionBegin;
591   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
592   if (nt != A->n) {
593     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
594   }
595   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
596   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
597   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "MatMultAdd_MPIAIJ"
604 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
605 {
606   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
607   int        ierr;
608 
609   PetscFunctionBegin;
610   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
611   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
612   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
613   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNCT__
618 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
619 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
620 {
621   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
622   int        ierr;
623 
624   PetscFunctionBegin;
625   /* do nondiagonal part */
626   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
627   /* send it on its way */
628   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
629   /* do local part */
630   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
631   /* receive remote parts: note this assumes the values are not actually */
632   /* inserted in yy until the next line, which is true for my implementation*/
633   /* but is not perhaps always true. */
634   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   PetscFunctionReturn(0);
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
640 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
641 {
642   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
643   int        ierr;
644 
645   PetscFunctionBegin;
646   /* do nondiagonal part */
647   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
648   /* send it on its way */
649   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
650   /* do local part */
651   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
652   /* receive remote parts: note this assumes the values are not actually */
653   /* inserted in yy until the next line, which is true for my implementation*/
654   /* but is not perhaps always true. */
655   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 /*
660   This only works correctly for square matrices where the subblock A->A is the
661    diagonal block
662 */
663 #undef __FUNCT__
664 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
665 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
666 {
667   int        ierr;
668   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
669 
670   PetscFunctionBegin;
671   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
672   if (a->rstart != a->cstart || a->rend != a->cend) {
673     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
674   }
675   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatScale_MPIAIJ"
681 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
682 {
683   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
684   int        ierr;
685 
686   PetscFunctionBegin;
687   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
688   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatDestroy_MPIAIJ"
694 int MatDestroy_MPIAIJ(Mat mat)
695 {
696   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
697   int        ierr;
698 
699   PetscFunctionBegin;
700 #if defined(PETSC_USE_LOG)
701   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
702 #endif
703   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
704   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
706   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
707 #if defined (PETSC_USE_CTABLE)
708   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
709 #else
710   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
711 #endif
712   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
713   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
714   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
715   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
716   ierr = PetscFree(aij);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
721 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
725 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
726 {
727   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
728   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
729   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
730   PetscTruth        isdraw,isascii,flg;
731   PetscViewer       sviewer;
732   PetscViewerFormat format;
733 
734   PetscFunctionBegin;
735   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
736   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
737   if (isascii) {
738     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
739     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
740       MatInfo info;
741       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
742       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
743       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
744       if (flg) {
745         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
746 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
747       } else {
748         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
749 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
750       }
751       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
752       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
753       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
754       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
755       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
756       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
757       PetscFunctionReturn(0);
758     } else if (format == PETSC_VIEWER_ASCII_INFO) {
759       PetscFunctionReturn(0);
760     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
761 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
762       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
763 #endif
764 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
765       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
766 #endif
767 
768       PetscFunctionReturn(0);
769     }
770   } else if (isdraw) {
771     PetscDraw       draw;
772     PetscTruth isnull;
773     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
774     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
775   }
776 
777   if (size == 1) {
778     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
779     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
780   } else {
781     /* assemble the entire matrix onto first processor. */
782     Mat         A;
783     Mat_SeqAIJ *Aloc;
784     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
785     PetscScalar *a;
786 
787     if (!rank) {
788       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
789     } else {
790       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
791     }
792     PetscLogObjectParent(mat,A);
793 
794     /* copy over the A part */
795     Aloc = (Mat_SeqAIJ*)aij->A->data;
796     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
797     row = aij->rstart;
798     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
799     for (i=0; i<m; i++) {
800       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
801       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
802     }
803     aj = Aloc->j;
804     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
805 
806     /* copy over the B part */
807     Aloc = (Mat_SeqAIJ*)aij->B->data;
808     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
809     row  = aij->rstart;
810     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
811     ct   = cols;
812     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
813     for (i=0; i<m; i++) {
814       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
815       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
816     }
817     ierr = PetscFree(ct);CHKERRQ(ierr);
818     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820     /*
821        Everyone has to call to draw the matrix since the graphics waits are
822        synchronized across all processors that share the PetscDraw object
823     */
824     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
825     if (!rank) {
826       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
827       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
828     }
829     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
830     ierr = MatDestroy(A);CHKERRQ(ierr);
831   }
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatView_MPIAIJ"
837 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
838 {
839   int        ierr;
840   PetscTruth isascii,isdraw,issocket,isbinary;
841 
842   PetscFunctionBegin;
843   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
844   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
845   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
846   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
847   if (isascii || isdraw || isbinary || issocket) {
848     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
849   } else {
850     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatRelax_MPIAIJ"
859 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
860 {
861   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
862   int          ierr;
863   Vec          bb1;
864   PetscScalar  mone=-1.0;
865 
866   PetscFunctionBegin;
867   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
868 
869   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
870 
871   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
872     if (flag & SOR_ZERO_INITIAL_GUESS) {
873       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
874       its--;
875     }
876 
877     while (its--) {
878       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
879       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880 
881       /* update rhs: bb1 = bb - B*x */
882       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
883       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
884 
885       /* local sweep */
886       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
887       CHKERRQ(ierr);
888     }
889   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
890     if (flag & SOR_ZERO_INITIAL_GUESS) {
891       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
892       its--;
893     }
894     while (its--) {
895       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
896       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897 
898       /* update rhs: bb1 = bb - B*x */
899       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
900       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
901 
902       /* local sweep */
903       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
904       CHKERRQ(ierr);
905     }
906   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
907     if (flag & SOR_ZERO_INITIAL_GUESS) {
908       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
909       its--;
910     }
911     while (its--) {
912       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
913       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
914 
915       /* update rhs: bb1 = bb - B*x */
916       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
917       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
918 
919       /* local sweep */
920       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
921       CHKERRQ(ierr);
922     }
923   } else {
924     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
925   }
926 
927   ierr = VecDestroy(bb1);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatGetInfo_MPIAIJ"
933 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
934 {
935   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
936   Mat        A = mat->A,B = mat->B;
937   int        ierr;
938   PetscReal  isend[5],irecv[5];
939 
940   PetscFunctionBegin;
941   info->block_size     = 1.0;
942   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
943   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
944   isend[3] = info->memory;  isend[4] = info->mallocs;
945   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
946   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
947   isend[3] += info->memory;  isend[4] += info->mallocs;
948   if (flag == MAT_LOCAL) {
949     info->nz_used      = isend[0];
950     info->nz_allocated = isend[1];
951     info->nz_unneeded  = isend[2];
952     info->memory       = isend[3];
953     info->mallocs      = isend[4];
954   } else if (flag == MAT_GLOBAL_MAX) {
955     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
956     info->nz_used      = irecv[0];
957     info->nz_allocated = irecv[1];
958     info->nz_unneeded  = irecv[2];
959     info->memory       = irecv[3];
960     info->mallocs      = irecv[4];
961   } else if (flag == MAT_GLOBAL_SUM) {
962     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
963     info->nz_used      = irecv[0];
964     info->nz_allocated = irecv[1];
965     info->nz_unneeded  = irecv[2];
966     info->memory       = irecv[3];
967     info->mallocs      = irecv[4];
968   }
969   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
970   info->fill_ratio_needed = 0;
971   info->factor_mallocs    = 0;
972   info->rows_global       = (double)matin->M;
973   info->columns_global    = (double)matin->N;
974   info->rows_local        = (double)matin->m;
975   info->columns_local     = (double)matin->N;
976 
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatSetOption_MPIAIJ"
982 int MatSetOption_MPIAIJ(Mat A,MatOption op)
983 {
984   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
985   int        ierr;
986 
987   PetscFunctionBegin;
988   switch (op) {
989   case MAT_NO_NEW_NONZERO_LOCATIONS:
990   case MAT_YES_NEW_NONZERO_LOCATIONS:
991   case MAT_COLUMNS_UNSORTED:
992   case MAT_COLUMNS_SORTED:
993   case MAT_NEW_NONZERO_ALLOCATION_ERR:
994   case MAT_KEEP_ZEROED_ROWS:
995   case MAT_NEW_NONZERO_LOCATION_ERR:
996   case MAT_USE_INODES:
997   case MAT_DO_NOT_USE_INODES:
998   case MAT_IGNORE_ZERO_ENTRIES:
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_ROW_ORIENTED:
1003     a->roworiented = PETSC_TRUE;
1004     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1005     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1006     break;
1007   case MAT_ROWS_SORTED:
1008   case MAT_ROWS_UNSORTED:
1009   case MAT_YES_NEW_DIAGONALS:
1010   case MAT_USE_SINGLE_PRECISION_SOLVES:
1011     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1012     break;
1013   case MAT_COLUMN_ORIENTED:
1014     a->roworiented = PETSC_FALSE;
1015     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1016     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1017     break;
1018   case MAT_IGNORE_OFF_PROC_ENTRIES:
1019     a->donotstash = PETSC_TRUE;
1020     break;
1021   case MAT_NO_NEW_DIAGONALS:
1022     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1023   default:
1024     SETERRQ(PETSC_ERR_SUP,"unknown option");
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatGetRow_MPIAIJ"
1031 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1032 {
1033   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1034   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1035   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1036   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1037   int          *cmap,*idx_p;
1038 
1039   PetscFunctionBegin;
1040   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1041   mat->getrowactive = PETSC_TRUE;
1042 
1043   if (!mat->rowvalues && (idx || v)) {
1044     /*
1045         allocate enough space to hold information from the longest row.
1046     */
1047     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1048     int     max = 1,tmp;
1049     for (i=0; i<matin->m; i++) {
1050       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1051       if (max < tmp) { max = tmp; }
1052     }
1053     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1054     mat->rowindices = (int*)(mat->rowvalues + max);
1055   }
1056 
1057   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1058   lrow = row - rstart;
1059 
1060   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1061   if (!v)   {pvA = 0; pvB = 0;}
1062   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1063   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   nztot = nzA + nzB;
1066 
1067   cmap  = mat->garray;
1068   if (v  || idx) {
1069     if (nztot) {
1070       /* Sort by increasing column numbers, assuming A and B already sorted */
1071       int imark = -1;
1072       if (v) {
1073         *v = v_p = mat->rowvalues;
1074         for (i=0; i<nzB; i++) {
1075           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1076           else break;
1077         }
1078         imark = i;
1079         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1080         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1081       }
1082       if (idx) {
1083         *idx = idx_p = mat->rowindices;
1084         if (imark > -1) {
1085           for (i=0; i<imark; i++) {
1086             idx_p[i] = cmap[cworkB[i]];
1087           }
1088         } else {
1089           for (i=0; i<nzB; i++) {
1090             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1091             else break;
1092           }
1093           imark = i;
1094         }
1095         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1096         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1097       }
1098     } else {
1099       if (idx) *idx = 0;
1100       if (v)   *v   = 0;
1101     }
1102   }
1103   *nz = nztot;
1104   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1105   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1111 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1112 {
1113   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1114 
1115   PetscFunctionBegin;
1116   if (aij->getrowactive == PETSC_FALSE) {
1117     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1118   }
1119   aij->getrowactive = PETSC_FALSE;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatNorm_MPIAIJ"
1125 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1126 {
1127   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1128   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1129   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1130   PetscReal    sum = 0.0;
1131   PetscScalar  *v;
1132 
1133   PetscFunctionBegin;
1134   if (aij->size == 1) {
1135     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1136   } else {
1137     if (type == NORM_FROBENIUS) {
1138       v = amat->a;
1139       for (i=0; i<amat->nz; i++) {
1140 #if defined(PETSC_USE_COMPLEX)
1141         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1142 #else
1143         sum += (*v)*(*v); v++;
1144 #endif
1145       }
1146       v = bmat->a;
1147       for (i=0; i<bmat->nz; i++) {
1148 #if defined(PETSC_USE_COMPLEX)
1149         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1150 #else
1151         sum += (*v)*(*v); v++;
1152 #endif
1153       }
1154       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1155       *norm = sqrt(*norm);
1156     } else if (type == NORM_1) { /* max column norm */
1157       PetscReal *tmp,*tmp2;
1158       int    *jj,*garray = aij->garray;
1159       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1160       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1161       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1162       *norm = 0.0;
1163       v = amat->a; jj = amat->j;
1164       for (j=0; j<amat->nz; j++) {
1165         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1166       }
1167       v = bmat->a; jj = bmat->j;
1168       for (j=0; j<bmat->nz; j++) {
1169         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1170       }
1171       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1172       for (j=0; j<mat->N; j++) {
1173         if (tmp2[j] > *norm) *norm = tmp2[j];
1174       }
1175       ierr = PetscFree(tmp);CHKERRQ(ierr);
1176       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1177     } else if (type == NORM_INFINITY) { /* max row norm */
1178       PetscReal ntemp = 0.0;
1179       for (j=0; j<aij->A->m; j++) {
1180         v = amat->a + amat->i[j] + shift;
1181         sum = 0.0;
1182         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1183           sum += PetscAbsScalar(*v); v++;
1184         }
1185         v = bmat->a + bmat->i[j] + shift;
1186         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1187           sum += PetscAbsScalar(*v); v++;
1188         }
1189         if (sum > ntemp) ntemp = sum;
1190       }
1191       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1192     } else {
1193       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1194     }
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatTranspose_MPIAIJ"
1201 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1202 {
1203   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1204   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1205   int          ierr,shift = Aloc->indexshift;
1206   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1207   Mat          B;
1208   PetscScalar  *array;
1209 
1210   PetscFunctionBegin;
1211   if (!matout && M != N) {
1212     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1213   }
1214 
1215   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1216 
1217   /* copy over the A part */
1218   Aloc = (Mat_SeqAIJ*)a->A->data;
1219   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1220   row = a->rstart;
1221   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1222   for (i=0; i<m; i++) {
1223     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1224     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1225   }
1226   aj = Aloc->j;
1227   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1228 
1229   /* copy over the B part */
1230   Aloc = (Mat_SeqAIJ*)a->B->data;
1231   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1232   row  = a->rstart;
1233   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1234   ct   = cols;
1235   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1236   for (i=0; i<m; i++) {
1237     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1238     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1239   }
1240   ierr = PetscFree(ct);CHKERRQ(ierr);
1241   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1242   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1243   if (matout) {
1244     *matout = B;
1245   } else {
1246     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1247   }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1253 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1254 {
1255   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1256   Mat        a = aij->A,b = aij->B;
1257   int        ierr,s1,s2,s3;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1261   if (rr) {
1262     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1263     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1264     /* Overlap communication with computation. */
1265     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1266   }
1267   if (ll) {
1268     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1269     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1270     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1271   }
1272   /* scale  the diagonal block */
1273   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1274 
1275   if (rr) {
1276     /* Do a scatter end and then right scale the off-diagonal block */
1277     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1278     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1279   }
1280 
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1287 int MatPrintHelp_MPIAIJ(Mat A)
1288 {
1289   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1290   int        ierr;
1291 
1292   PetscFunctionBegin;
1293   if (!a->rank) {
1294     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1295   }
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1301 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1302 {
1303   PetscFunctionBegin;
1304   *bs = 1;
1305   PetscFunctionReturn(0);
1306 }
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1309 int MatSetUnfactored_MPIAIJ(Mat A)
1310 {
1311   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1312   int        ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatEqual_MPIAIJ"
1321 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1322 {
1323   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1324   Mat        a,b,c,d;
1325   PetscTruth flg;
1326   int        ierr;
1327 
1328   PetscFunctionBegin;
1329   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1330   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1331   a = matA->A; b = matA->B;
1332   c = matB->A; d = matB->B;
1333 
1334   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1335   if (flg == PETSC_TRUE) {
1336     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1337   }
1338   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatCopy_MPIAIJ"
1344 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1345 {
1346   int        ierr;
1347   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1348   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1349   PetscTruth flg;
1350 
1351   PetscFunctionBegin;
1352   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1353   if (str != SAME_NONZERO_PATTERN || !flg) {
1354     /* because of the column compression in the off-processor part of the matrix a->B,
1355        the number of columns in a->B and b->B may be different, hence we cannot call
1356        the MatCopy() directly on the two parts. If need be, we can provide a more
1357        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1358        then copying the submatrices */
1359     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1360   } else {
1361     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1362     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1369 int MatSetUpPreallocation_MPIAIJ(Mat A)
1370 {
1371   int        ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1379 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1380 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1381 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1382 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1383 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1384 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1385 #endif
1386 
1387 #include "petscblaslapack.h"
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatAXPY_MPIAIJ"
1391 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1392 {
1393   int        ierr,one;
1394   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1395   Mat_SeqAIJ *x,*y;
1396 
1397   PetscFunctionBegin;
1398   if (str == SAME_NONZERO_PATTERN) {
1399     x  = (Mat_SeqAIJ *)xx->A->data;
1400     y  = (Mat_SeqAIJ *)yy->A->data;
1401     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1402     x  = (Mat_SeqAIJ *)xx->B->data;
1403     y  = (Mat_SeqAIJ *)yy->B->data;
1404     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1405   } else {
1406     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 /* -------------------------------------------------------------------*/
1412 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1413        MatGetRow_MPIAIJ,
1414        MatRestoreRow_MPIAIJ,
1415        MatMult_MPIAIJ,
1416        MatMultAdd_MPIAIJ,
1417        MatMultTranspose_MPIAIJ,
1418        MatMultTransposeAdd_MPIAIJ,
1419        0,
1420        0,
1421        0,
1422        0,
1423        0,
1424        0,
1425        MatRelax_MPIAIJ,
1426        MatTranspose_MPIAIJ,
1427        MatGetInfo_MPIAIJ,
1428        MatEqual_MPIAIJ,
1429        MatGetDiagonal_MPIAIJ,
1430        MatDiagonalScale_MPIAIJ,
1431        MatNorm_MPIAIJ,
1432        MatAssemblyBegin_MPIAIJ,
1433        MatAssemblyEnd_MPIAIJ,
1434        0,
1435        MatSetOption_MPIAIJ,
1436        MatZeroEntries_MPIAIJ,
1437        MatZeroRows_MPIAIJ,
1438 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1439        MatLUFactorSymbolic_MPIAIJ_TFS,
1440 #else
1441        0,
1442 #endif
1443        0,
1444        0,
1445        0,
1446        MatSetUpPreallocation_MPIAIJ,
1447        0,
1448        0,
1449        0,
1450        0,
1451        MatDuplicate_MPIAIJ,
1452        0,
1453        0,
1454        0,
1455        0,
1456        MatAXPY_MPIAIJ,
1457        MatGetSubMatrices_MPIAIJ,
1458        MatIncreaseOverlap_MPIAIJ,
1459        MatGetValues_MPIAIJ,
1460        MatCopy_MPIAIJ,
1461        MatPrintHelp_MPIAIJ,
1462        MatScale_MPIAIJ,
1463        0,
1464        0,
1465        0,
1466        MatGetBlockSize_MPIAIJ,
1467        0,
1468        0,
1469        0,
1470        0,
1471        MatFDColoringCreate_MPIAIJ,
1472        0,
1473        MatSetUnfactored_MPIAIJ,
1474        0,
1475        0,
1476        MatGetSubMatrix_MPIAIJ,
1477        MatDestroy_MPIAIJ,
1478        MatView_MPIAIJ,
1479        MatGetPetscMaps_Petsc,
1480        0,
1481        0,
1482        0,
1483        0,
1484        0,
1485        0,
1486        0,
1487        0,
1488        MatSetColoring_MPIAIJ,
1489        MatSetValuesAdic_MPIAIJ,
1490        MatSetValuesAdifor_MPIAIJ
1491 };
1492 
1493 /* ----------------------------------------------------------------------------------------*/
1494 
1495 EXTERN_C_BEGIN
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1498 int MatStoreValues_MPIAIJ(Mat mat)
1499 {
1500   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1501   int        ierr;
1502 
1503   PetscFunctionBegin;
1504   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1505   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 EXTERN_C_END
1509 
1510 EXTERN_C_BEGIN
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1513 int MatRetrieveValues_MPIAIJ(Mat mat)
1514 {
1515   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1516   int        ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1520   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1521   PetscFunctionReturn(0);
1522 }
1523 EXTERN_C_END
1524 
1525 #include "petscpc.h"
1526 EXTERN_C_BEGIN
1527 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1528 EXTERN_C_END
1529 
1530 EXTERN_C_BEGIN
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatCreate_MPIAIJ"
1533 int MatCreate_MPIAIJ(Mat B)
1534 {
1535   Mat_MPIAIJ *b;
1536   int        ierr,i,size;
1537 
1538   PetscFunctionBegin;
1539   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1540 
1541   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1542   B->data         = (void*)b;
1543   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1544   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1545   B->factor       = 0;
1546   B->assembled    = PETSC_FALSE;
1547   B->mapping      = 0;
1548 
1549   B->insertmode      = NOT_SET_VALUES;
1550   b->size            = size;
1551   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1552 
1553   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1554   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1555 
1556   /* the information in the maps duplicates the information computed below, eventually
1557      we should remove the duplicate information that is not contained in the maps */
1558   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1559   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1560 
1561   /* build local table of row and column ownerships */
1562   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1563   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1564   b->cowners = b->rowners + b->size + 2;
1565   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1566   b->rowners[0] = 0;
1567   for (i=2; i<=b->size; i++) {
1568     b->rowners[i] += b->rowners[i-1];
1569   }
1570   b->rstart = b->rowners[b->rank];
1571   b->rend   = b->rowners[b->rank+1];
1572   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1573   b->cowners[0] = 0;
1574   for (i=2; i<=b->size; i++) {
1575     b->cowners[i] += b->cowners[i-1];
1576   }
1577   b->cstart = b->cowners[b->rank];
1578   b->cend   = b->cowners[b->rank+1];
1579 
1580   /* build cache for off array entries formed */
1581   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1582   b->donotstash  = PETSC_FALSE;
1583   b->colmap      = 0;
1584   b->garray      = 0;
1585   b->roworiented = PETSC_TRUE;
1586 
1587   /* stuff used for matrix vector multiply */
1588   b->lvec      = PETSC_NULL;
1589   b->Mvctx     = PETSC_NULL;
1590 
1591   /* stuff for MatGetRow() */
1592   b->rowindices   = 0;
1593   b->rowvalues    = 0;
1594   b->getrowactive = PETSC_FALSE;
1595 
1596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1597                                      "MatStoreValues_MPIAIJ",
1598                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1600                                      "MatRetrieveValues_MPIAIJ",
1601                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1603                                      "MatGetDiagonalBlock_MPIAIJ",
1604                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1605 
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1612 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1613 {
1614   Mat        mat;
1615   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1616   int        ierr;
1617 
1618   PetscFunctionBegin;
1619   *newmat       = 0;
1620   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1621   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1622   a    = (Mat_MPIAIJ*)mat->data;
1623   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1624   mat->factor       = matin->factor;
1625   mat->assembled    = PETSC_TRUE;
1626   mat->insertmode   = NOT_SET_VALUES;
1627   mat->preallocated = PETSC_TRUE;
1628 
1629   a->rstart       = oldmat->rstart;
1630   a->rend         = oldmat->rend;
1631   a->cstart       = oldmat->cstart;
1632   a->cend         = oldmat->cend;
1633   a->size         = oldmat->size;
1634   a->rank         = oldmat->rank;
1635   a->donotstash   = oldmat->donotstash;
1636   a->roworiented  = oldmat->roworiented;
1637   a->rowindices   = 0;
1638   a->rowvalues    = 0;
1639   a->getrowactive = PETSC_FALSE;
1640 
1641   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1642   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1643   if (oldmat->colmap) {
1644 #if defined (PETSC_USE_CTABLE)
1645     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1646 #else
1647     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1648     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1649     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1650 #endif
1651   } else a->colmap = 0;
1652   if (oldmat->garray) {
1653     int len;
1654     len  = oldmat->B->n;
1655     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1656     PetscLogObjectMemory(mat,len*sizeof(int));
1657     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1658   } else a->garray = 0;
1659 
1660   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1661   PetscLogObjectParent(mat,a->lvec);
1662   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1663   PetscLogObjectParent(mat,a->Mvctx);
1664   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1665   PetscLogObjectParent(mat,a->A);
1666   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1667   PetscLogObjectParent(mat,a->B);
1668   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1669   *newmat = mat;
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #include "petscsys.h"
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatLoad_MPIAIJ"
1678 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1679 {
1680   Mat          A;
1681   PetscScalar  *vals,*svals;
1682   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1683   MPI_Status   status;
1684   int          i,nz,ierr,j,rstart,rend,fd;
1685   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1686   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1687   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1688 
1689   PetscFunctionBegin;
1690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1692   if (!rank) {
1693     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1694     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1695     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1696     if (header[3] < 0) {
1697       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1698     }
1699   }
1700 
1701   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1702   M = header[1]; N = header[2];
1703   /* determine ownership of all rows */
1704   m = M/size + ((M % size) > rank);
1705   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1706   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1707   rowners[0] = 0;
1708   for (i=2; i<=size; i++) {
1709     rowners[i] += rowners[i-1];
1710   }
1711   rstart = rowners[rank];
1712   rend   = rowners[rank+1];
1713 
1714   /* distribute row lengths to all processors */
1715   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1716   offlens = ourlens + (rend-rstart);
1717   if (!rank) {
1718     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1719     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1720     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1721     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1722     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1723     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1724   } else {
1725     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1726   }
1727 
1728   if (!rank) {
1729     /* calculate the number of nonzeros on each processor */
1730     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1731     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1732     for (i=0; i<size; i++) {
1733       for (j=rowners[i]; j< rowners[i+1]; j++) {
1734         procsnz[i] += rowlengths[j];
1735       }
1736     }
1737     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1738 
1739     /* determine max buffer needed and allocate it */
1740     maxnz = 0;
1741     for (i=0; i<size; i++) {
1742       maxnz = PetscMax(maxnz,procsnz[i]);
1743     }
1744     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1745 
1746     /* read in my part of the matrix column indices  */
1747     nz   = procsnz[0];
1748     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1749     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1750 
1751     /* read in every one elses and ship off */
1752     for (i=1; i<size; i++) {
1753       nz   = procsnz[i];
1754       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1755       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1756     }
1757     ierr = PetscFree(cols);CHKERRQ(ierr);
1758   } else {
1759     /* determine buffer space needed for message */
1760     nz = 0;
1761     for (i=0; i<m; i++) {
1762       nz += ourlens[i];
1763     }
1764     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1765 
1766     /* receive message of column indices*/
1767     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1768     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1769     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1770   }
1771 
1772   /* determine column ownership if matrix is not square */
1773   if (N != M) {
1774     n      = N/size + ((N % size) > rank);
1775     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1776     cstart = cend - n;
1777   } else {
1778     cstart = rstart;
1779     cend   = rend;
1780     n      = cend - cstart;
1781   }
1782 
1783   /* loop over local rows, determining number of off diagonal entries */
1784   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1785   jj = 0;
1786   for (i=0; i<m; i++) {
1787     for (j=0; j<ourlens[i]; j++) {
1788       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1789       jj++;
1790     }
1791   }
1792 
1793   /* create our matrix */
1794   for (i=0; i<m; i++) {
1795     ourlens[i] -= offlens[i];
1796   }
1797   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1798   A = *newmat;
1799   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1800   for (i=0; i<m; i++) {
1801     ourlens[i] += offlens[i];
1802   }
1803 
1804   if (!rank) {
1805     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1806 
1807     /* read in my part of the matrix numerical values  */
1808     nz   = procsnz[0];
1809     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1810 
1811     /* insert into matrix */
1812     jj      = rstart;
1813     smycols = mycols;
1814     svals   = vals;
1815     for (i=0; i<m; i++) {
1816       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1817       smycols += ourlens[i];
1818       svals   += ourlens[i];
1819       jj++;
1820     }
1821 
1822     /* read in other processors and ship out */
1823     for (i=1; i<size; i++) {
1824       nz   = procsnz[i];
1825       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1826       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1827     }
1828     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1829   } else {
1830     /* receive numeric values */
1831     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1832 
1833     /* receive message of values*/
1834     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1835     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1836     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1837 
1838     /* insert into matrix */
1839     jj      = rstart;
1840     smycols = mycols;
1841     svals   = vals;
1842     for (i=0; i<m; i++) {
1843       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1844       smycols += ourlens[i];
1845       svals   += ourlens[i];
1846       jj++;
1847     }
1848   }
1849   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1850   ierr = PetscFree(vals);CHKERRQ(ierr);
1851   ierr = PetscFree(mycols);CHKERRQ(ierr);
1852   ierr = PetscFree(rowners);CHKERRQ(ierr);
1853 
1854   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1855   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 EXTERN_C_END
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1862 /*
1863     Not great since it makes two copies of the submatrix, first an SeqAIJ
1864   in local and then by concatenating the local matrices the end result.
1865   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1866 */
1867 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1868 {
1869   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1870   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1871   Mat          *local,M,Mreuse;
1872   PetscScalar  *vwork,*aa;
1873   MPI_Comm     comm = mat->comm;
1874   Mat_SeqAIJ   *aij;
1875 
1876 
1877   PetscFunctionBegin;
1878   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1879   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1880 
1881   if (call ==  MAT_REUSE_MATRIX) {
1882     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1883     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1884     local = &Mreuse;
1885     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1886   } else {
1887     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1888     Mreuse = *local;
1889     ierr   = PetscFree(local);CHKERRQ(ierr);
1890   }
1891 
1892   /*
1893       m - number of local rows
1894       n - number of columns (same on all processors)
1895       rstart - first row in new global matrix generated
1896   */
1897   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1898   if (call == MAT_INITIAL_MATRIX) {
1899     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1900     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1901     ii  = aij->i;
1902     jj  = aij->j;
1903 
1904     /*
1905         Determine the number of non-zeros in the diagonal and off-diagonal
1906         portions of the matrix in order to do correct preallocation
1907     */
1908 
1909     /* first get start and end of "diagonal" columns */
1910     if (csize == PETSC_DECIDE) {
1911       nlocal = n/size + ((n % size) > rank);
1912     } else {
1913       nlocal = csize;
1914     }
1915     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1916     rstart = rend - nlocal;
1917     if (rank == size - 1 && rend != n) {
1918       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1919     }
1920 
1921     /* next, compute all the lengths */
1922     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1923     olens = dlens + m;
1924     for (i=0; i<m; i++) {
1925       jend = ii[i+1] - ii[i];
1926       olen = 0;
1927       dlen = 0;
1928       for (j=0; j<jend; j++) {
1929         if (*jj < rstart || *jj >= rend) olen++;
1930         else dlen++;
1931         jj++;
1932       }
1933       olens[i] = olen;
1934       dlens[i] = dlen;
1935     }
1936     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1937     ierr = PetscFree(dlens);CHKERRQ(ierr);
1938   } else {
1939     int ml,nl;
1940 
1941     M = *newmat;
1942     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1943     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1944     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1945     /*
1946          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1947        rather than the slower MatSetValues().
1948     */
1949     M->was_assembled = PETSC_TRUE;
1950     M->assembled     = PETSC_FALSE;
1951   }
1952   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1953   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1954   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1955   ii  = aij->i;
1956   jj  = aij->j;
1957   aa  = aij->a;
1958   for (i=0; i<m; i++) {
1959     row   = rstart + i;
1960     nz    = ii[i+1] - ii[i];
1961     cwork = jj;     jj += nz;
1962     vwork = aa;     aa += nz;
1963     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1964   }
1965 
1966   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   *newmat = M;
1969 
1970   /* save submatrix used in processor for next request */
1971   if (call ==  MAT_INITIAL_MATRIX) {
1972     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1973     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1974   }
1975 
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1981 /*@C
1982    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1983    (the default parallel PETSc format).  For good matrix assembly performance
1984    the user should preallocate the matrix storage by setting the parameters
1985    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1986    performance can be increased by more than a factor of 50.
1987 
1988    Collective on MPI_Comm
1989 
1990    Input Parameters:
1991 +  A - the matrix
1992 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1993            (same value is used for all local rows)
1994 .  d_nnz - array containing the number of nonzeros in the various rows of the
1995            DIAGONAL portion of the local submatrix (possibly different for each row)
1996            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1997            The size of this array is equal to the number of local rows, i.e 'm'.
1998            You must leave room for the diagonal entry even if it is zero.
1999 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2000            submatrix (same value is used for all local rows).
2001 -  o_nnz - array containing the number of nonzeros in the various rows of the
2002            OFF-DIAGONAL portion of the local submatrix (possibly different for
2003            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2004            structure. The size of this array is equal to the number
2005            of local rows, i.e 'm'.
2006 
2007    The AIJ format (also called the Yale sparse matrix format or
2008    compressed row storage), is fully compatible with standard Fortran 77
2009    storage.  That is, the stored row and column indices can begin at
2010    either one (as in Fortran) or zero.  See the users manual for details.
2011 
2012    The user MUST specify either the local or global matrix dimensions
2013    (possibly both).
2014 
2015    The parallel matrix is partitioned such that the first m0 rows belong to
2016    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2017    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2018 
2019    The DIAGONAL portion of the local submatrix of a processor can be defined
2020    as the submatrix which is obtained by extraction the part corresponding
2021    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2022    first row that belongs to the processor, and r2 is the last row belonging
2023    to the this processor. This is a square mxm matrix. The remaining portion
2024    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2025 
2026    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2027 
2028    By default, this format uses inodes (identical nodes) when possible.
2029    We search for consecutive rows with the same nonzero structure, thereby
2030    reusing matrix information to achieve increased efficiency.
2031 
2032    Options Database Keys:
2033 +  -mat_aij_no_inode  - Do not use inodes
2034 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2035 -  -mat_aij_oneindex - Internally use indexing starting at 1
2036         rather than 0.  Note that when calling MatSetValues(),
2037         the user still MUST index entries starting at 0!
2038 
2039    Example usage:
2040 
2041    Consider the following 8x8 matrix with 34 non-zero values, that is
2042    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2043    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2044    as follows:
2045 
2046 .vb
2047             1  2  0  |  0  3  0  |  0  4
2048     Proc0   0  5  6  |  7  0  0  |  8  0
2049             9  0 10  | 11  0  0  | 12  0
2050     -------------------------------------
2051            13  0 14  | 15 16 17  |  0  0
2052     Proc1   0 18  0  | 19 20 21  |  0  0
2053             0  0  0  | 22 23  0  | 24  0
2054     -------------------------------------
2055     Proc2  25 26 27  |  0  0 28  | 29  0
2056            30  0  0  | 31 32 33  |  0 34
2057 .ve
2058 
2059    This can be represented as a collection of submatrices as:
2060 
2061 .vb
2062       A B C
2063       D E F
2064       G H I
2065 .ve
2066 
2067    Where the submatrices A,B,C are owned by proc0, D,E,F are
2068    owned by proc1, G,H,I are owned by proc2.
2069 
2070    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2071    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2072    The 'M','N' parameters are 8,8, and have the same values on all procs.
2073 
2074    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2075    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2076    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2077    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2078    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2079    matrix, ans [DF] as another SeqAIJ matrix.
2080 
2081    When d_nz, o_nz parameters are specified, d_nz storage elements are
2082    allocated for every row of the local diagonal submatrix, and o_nz
2083    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2084    One way to choose d_nz and o_nz is to use the max nonzerors per local
2085    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2086    In this case, the values of d_nz,o_nz are:
2087 .vb
2088      proc0 : dnz = 2, o_nz = 2
2089      proc1 : dnz = 3, o_nz = 2
2090      proc2 : dnz = 1, o_nz = 4
2091 .ve
2092    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2093    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2094    for proc3. i.e we are using 12+15+10=37 storage locations to store
2095    34 values.
2096 
2097    When d_nnz, o_nnz parameters are specified, the storage is specified
2098    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2099    In the above case the values for d_nnz,o_nnz are:
2100 .vb
2101      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2102      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2103      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2104 .ve
2105    Here the space allocated is sum of all the above values i.e 34, and
2106    hence pre-allocation is perfect.
2107 
2108    Level: intermediate
2109 
2110 .keywords: matrix, aij, compressed row, sparse, parallel
2111 
2112 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2113 @*/
2114 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2115 {
2116   Mat_MPIAIJ   *b;
2117   int          ierr,i;
2118   PetscTruth   flg2;
2119 
2120   PetscFunctionBegin;
2121   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2122   if (!flg2) PetscFunctionReturn(0);
2123   B->preallocated = PETSC_TRUE;
2124   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2125   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2126   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2127   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2128   if (d_nnz) {
2129     for (i=0; i<B->m; i++) {
2130       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2131     }
2132   }
2133   if (o_nnz) {
2134     for (i=0; i<B->m; i++) {
2135       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2136     }
2137   }
2138   b = (Mat_MPIAIJ*)B->data;
2139 
2140   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2141   PetscLogObjectParent(B,b->A);
2142   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2143   PetscLogObjectParent(B,b->B);
2144 
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatCreateMPIAIJ"
2150 /*@C
2151    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2152    (the default parallel PETSc format).  For good matrix assembly performance
2153    the user should preallocate the matrix storage by setting the parameters
2154    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2155    performance can be increased by more than a factor of 50.
2156 
2157    Collective on MPI_Comm
2158 
2159    Input Parameters:
2160 +  comm - MPI communicator
2161 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2162            This value should be the same as the local size used in creating the
2163            y vector for the matrix-vector product y = Ax.
2164 .  n - This value should be the same as the local size used in creating the
2165        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2166        calculated if N is given) For square matrices n is almost always m.
2167 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2168 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2169 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2170            (same value is used for all local rows)
2171 .  d_nnz - array containing the number of nonzeros in the various rows of the
2172            DIAGONAL portion of the local submatrix (possibly different for each row)
2173            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2174            The size of this array is equal to the number of local rows, i.e 'm'.
2175            You must leave room for the diagonal entry even if it is zero.
2176 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2177            submatrix (same value is used for all local rows).
2178 -  o_nnz - array containing the number of nonzeros in the various rows of the
2179            OFF-DIAGONAL portion of the local submatrix (possibly different for
2180            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2181            structure. The size of this array is equal to the number
2182            of local rows, i.e 'm'.
2183 
2184    Output Parameter:
2185 .  A - the matrix
2186 
2187    Notes:
2188    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2189    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2190    storage requirements for this matrix.
2191 
2192    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2193    processor than it must be used on all processors that share the object for
2194    that argument.
2195 
2196    The AIJ format (also called the Yale sparse matrix format or
2197    compressed row storage), is fully compatible with standard Fortran 77
2198    storage.  That is, the stored row and column indices can begin at
2199    either one (as in Fortran) or zero.  See the users manual for details.
2200 
2201    The user MUST specify either the local or global matrix dimensions
2202    (possibly both).
2203 
2204    The parallel matrix is partitioned such that the first m0 rows belong to
2205    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2206    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2207 
2208    The DIAGONAL portion of the local submatrix of a processor can be defined
2209    as the submatrix which is obtained by extraction the part corresponding
2210    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2211    first row that belongs to the processor, and r2 is the last row belonging
2212    to the this processor. This is a square mxm matrix. The remaining portion
2213    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2214 
2215    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2216 
2217    By default, this format uses inodes (identical nodes) when possible.
2218    We search for consecutive rows with the same nonzero structure, thereby
2219    reusing matrix information to achieve increased efficiency.
2220 
2221    Options Database Keys:
2222 +  -mat_aij_no_inode  - Do not use inodes
2223 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2224 -  -mat_aij_oneindex - Internally use indexing starting at 1
2225         rather than 0.  Note that when calling MatSetValues(),
2226         the user still MUST index entries starting at 0!
2227 
2228 
2229    Example usage:
2230 
2231    Consider the following 8x8 matrix with 34 non-zero values, that is
2232    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2233    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2234    as follows:
2235 
2236 .vb
2237             1  2  0  |  0  3  0  |  0  4
2238     Proc0   0  5  6  |  7  0  0  |  8  0
2239             9  0 10  | 11  0  0  | 12  0
2240     -------------------------------------
2241            13  0 14  | 15 16 17  |  0  0
2242     Proc1   0 18  0  | 19 20 21  |  0  0
2243             0  0  0  | 22 23  0  | 24  0
2244     -------------------------------------
2245     Proc2  25 26 27  |  0  0 28  | 29  0
2246            30  0  0  | 31 32 33  |  0 34
2247 .ve
2248 
2249    This can be represented as a collection of submatrices as:
2250 
2251 .vb
2252       A B C
2253       D E F
2254       G H I
2255 .ve
2256 
2257    Where the submatrices A,B,C are owned by proc0, D,E,F are
2258    owned by proc1, G,H,I are owned by proc2.
2259 
2260    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2261    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2262    The 'M','N' parameters are 8,8, and have the same values on all procs.
2263 
2264    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2265    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2266    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2267    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2268    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2269    matrix, ans [DF] as another SeqAIJ matrix.
2270 
2271    When d_nz, o_nz parameters are specified, d_nz storage elements are
2272    allocated for every row of the local diagonal submatrix, and o_nz
2273    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2274    One way to choose d_nz and o_nz is to use the max nonzerors per local
2275    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2276    In this case, the values of d_nz,o_nz are:
2277 .vb
2278      proc0 : dnz = 2, o_nz = 2
2279      proc1 : dnz = 3, o_nz = 2
2280      proc2 : dnz = 1, o_nz = 4
2281 .ve
2282    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2283    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2284    for proc3. i.e we are using 12+15+10=37 storage locations to store
2285    34 values.
2286 
2287    When d_nnz, o_nnz parameters are specified, the storage is specified
2288    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2289    In the above case the values for d_nnz,o_nnz are:
2290 .vb
2291      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2292      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2293      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2294 .ve
2295    Here the space allocated is sum of all the above values i.e 34, and
2296    hence pre-allocation is perfect.
2297 
2298    Level: intermediate
2299 
2300 .keywords: matrix, aij, compressed row, sparse, parallel
2301 
2302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2303 @*/
2304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2305 {
2306   int ierr,size;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2310   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2311   if (size > 1) {
2312     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2313     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2314   } else {
2315     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2316     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2317   }
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2323 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2324 {
2325   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2326   PetscFunctionBegin;
2327   *Ad     = a->A;
2328   *Ao     = a->B;
2329   *colmap = a->garray;
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2335 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2336 {
2337   int        ierr;
2338   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2339 
2340   PetscFunctionBegin;
2341   if (coloring->ctype == IS_COLORING_LOCAL) {
2342     int        *allcolors,*colors,i;
2343     ISColoring ocoloring;
2344 
2345     /* set coloring for diagonal portion */
2346     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2347 
2348     /* set coloring for off-diagonal portion */
2349     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2350     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2351     for (i=0; i<a->B->n; i++) {
2352       colors[i] = allcolors[a->garray[i]];
2353     }
2354     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2355     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2356     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2357     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2358   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2359     int        *colors,i,*larray;
2360     ISColoring ocoloring;
2361 
2362     /* set coloring for diagonal portion */
2363     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2364     for (i=0; i<a->A->n; i++) {
2365       larray[i] = i + a->cstart;
2366     }
2367     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2368     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2369     for (i=0; i<a->A->n; i++) {
2370       colors[i] = coloring->colors[larray[i]];
2371     }
2372     ierr = PetscFree(larray);CHKERRQ(ierr);
2373     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2374     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2375     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2376 
2377     /* set coloring for off-diagonal portion */
2378     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2379     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2380     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2381     for (i=0; i<a->B->n; i++) {
2382       colors[i] = coloring->colors[larray[i]];
2383     }
2384     ierr = PetscFree(larray);CHKERRQ(ierr);
2385     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2386     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2387     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2388   } else {
2389     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2390   }
2391 
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2397 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2398 {
2399   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2400   int        ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2404   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2410 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2411 {
2412   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2413   int        ierr;
2414 
2415   PetscFunctionBegin;
2416   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2417   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatFileMerge"
2423 /*@C
2424       MatFileMerge - Saves a single large PETSc matrix by reading in
2425             sequential matrices from each processor and concatinating them
2426             together.
2427 
2428     Collective on MPI_Comm
2429 
2430    Input Parameters:
2431 +    comm - the communicators the individual files are on
2432 .    infile - name of input file
2433 -    outfile - name of output file
2434 
2435    Notes: The number of columns of the matrix in EACH of the seperate files
2436       MUST be the same.
2437 
2438           The true name of the input files on each processor is obtained by
2439       appending .rank onto the infile string; where rank is 0 up to one less
2440       then the number of processors
2441 @*/
2442 int MatFileMerge(MPI_Comm comm,char *infile,char *outfile)
2443 {
2444   int         ierr,rank,len,m,n,i,rstart,*indx,nnz,I;
2445   PetscViewer in,out;
2446   char        *name;
2447   Mat         A,B;
2448   PetscScalar *values;
2449 
2450   PetscFunctionBegin;
2451   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2452   ierr = PetscStrlen(infile,&len);CHKERRQ(ierr);
2453   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2454   sprintf(name,"%s.%d",infile,rank);
2455   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_RDONLY,&in);CHKERRQ(ierr);
2456   ierr = PetscFree(name);
2457   ierr = MatLoad(in,MATSEQAIJ,&A);CHKERRQ(ierr);
2458   ierr = PetscViewerDestroy(in);CHKERRQ(ierr);
2459 
2460   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
2461   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DETERMINE,n,0,0,0,0,&B);CHKERRQ(ierr);
2462   ierr = MatGetOwnershipRange(B,&rstart,0);CHKERRQ(ierr);
2463   for (i=0;i<m;i++) {
2464     ierr = MatGetRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr);
2465     I    = i + rstart;
2466     ierr = MatSetValues(B,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2467     ierr = MatRestoreRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr);
2468   }
2469   ierr = MatDestroy(A);CHKERRQ(ierr);
2470   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2471   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2472 
2473   ierr = PetscViewerBinaryOpen(comm,outfile,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2474   ierr = MatView(B,out);CHKERRQ(ierr);
2475   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2476   ierr = MatDestroy(B);CHKERRQ(ierr);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatFileSplit"
2482 int MatFileSplit(Mat A,char *outfile)
2483 {
2484   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2485   PetscViewer out;
2486   char        *name;
2487   Mat         B;
2488   PetscScalar *values;
2489 
2490   PetscFunctionBegin;
2491 
2492   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2493   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2494   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2495   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2496   for (i=0;i<m;i++) {
2497     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2498     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2499     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2500   }
2501   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2503 
2504   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2505   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2506   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2507   sprintf(name,"%s.%d",outfile,rank);
2508   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2509   ierr = PetscFree(name);
2510   ierr = MatView(B,out);CHKERRQ(ierr);
2511   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2512   ierr = MatDestroy(B);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515